{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import pandas as pd\n",
    "import sklearn\n",
    "import matplotlib.pyplot as plt"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "people=pd.read_csv(\"data/people.csv\",sep=',',header=0,keep_default_na=True,parse_dates=['date'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>char_1</th>\n",
       "      <th>group_1</th>\n",
       "      <th>char_2</th>\n",
       "      <th>date</th>\n",
       "      <th>char_3</th>\n",
       "      <th>char_4</th>\n",
       "      <th>char_5</th>\n",
       "      <th>char_6</th>\n",
       "      <th>char_7</th>\n",
       "      <th>char_8</th>\n",
       "      <th>...</th>\n",
       "      <th>char_29</th>\n",
       "      <th>char_30</th>\n",
       "      <th>char_31</th>\n",
       "      <th>char_32</th>\n",
       "      <th>char_33</th>\n",
       "      <th>char_34</th>\n",
       "      <th>char_35</th>\n",
       "      <th>char_36</th>\n",
       "      <th>char_37</th>\n",
       "      <th>char_38</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>people_id</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>ppl_100</th>\n",
       "      <td>type 2</td>\n",
       "      <td>group 17304</td>\n",
       "      <td>type 2</td>\n",
       "      <td>2021-06-29</td>\n",
       "      <td>type 5</td>\n",
       "      <td>type 5</td>\n",
       "      <td>type 5</td>\n",
       "      <td>type 3</td>\n",
       "      <td>type 11</td>\n",
       "      <td>type 2</td>\n",
       "      <td>...</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>36</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>ppl_100002</th>\n",
       "      <td>type 2</td>\n",
       "      <td>group 8688</td>\n",
       "      <td>type 3</td>\n",
       "      <td>2021-01-06</td>\n",
       "      <td>type 28</td>\n",
       "      <td>type 9</td>\n",
       "      <td>type 5</td>\n",
       "      <td>type 3</td>\n",
       "      <td>type 11</td>\n",
       "      <td>type 2</td>\n",
       "      <td>...</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>76</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>ppl_100003</th>\n",
       "      <td>type 2</td>\n",
       "      <td>group 33592</td>\n",
       "      <td>type 3</td>\n",
       "      <td>2022-06-10</td>\n",
       "      <td>type 4</td>\n",
       "      <td>type 8</td>\n",
       "      <td>type 5</td>\n",
       "      <td>type 2</td>\n",
       "      <td>type 5</td>\n",
       "      <td>type 2</td>\n",
       "      <td>...</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>99</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>ppl_100004</th>\n",
       "      <td>type 2</td>\n",
       "      <td>group 22593</td>\n",
       "      <td>type 3</td>\n",
       "      <td>2022-07-20</td>\n",
       "      <td>type 40</td>\n",
       "      <td>type 25</td>\n",
       "      <td>type 9</td>\n",
       "      <td>type 4</td>\n",
       "      <td>type 16</td>\n",
       "      <td>type 2</td>\n",
       "      <td>...</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>76</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>ppl_100006</th>\n",
       "      <td>type 2</td>\n",
       "      <td>group 6534</td>\n",
       "      <td>type 3</td>\n",
       "      <td>2022-07-27</td>\n",
       "      <td>type 40</td>\n",
       "      <td>type 25</td>\n",
       "      <td>type 9</td>\n",
       "      <td>type 3</td>\n",
       "      <td>type 8</td>\n",
       "      <td>type 2</td>\n",
       "      <td>...</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>84</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 40 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "            char_1      group_1  char_2       date   char_3   char_4  char_5  \\\n",
       "people_id                                                                      \n",
       "ppl_100     type 2  group 17304  type 2 2021-06-29   type 5   type 5  type 5   \n",
       "ppl_100002  type 2   group 8688  type 3 2021-01-06  type 28   type 9  type 5   \n",
       "ppl_100003  type 2  group 33592  type 3 2022-06-10   type 4   type 8  type 5   \n",
       "ppl_100004  type 2  group 22593  type 3 2022-07-20  type 40  type 25  type 9   \n",
       "ppl_100006  type 2   group 6534  type 3 2022-07-27  type 40  type 25  type 9   \n",
       "\n",
       "            char_6   char_7  char_8   ...   char_29 char_30 char_31 char_32  \\\n",
       "people_id                             ...                                     \n",
       "ppl_100     type 3  type 11  type 2   ...     False    True    True   False   \n",
       "ppl_100002  type 3  type 11  type 2   ...     False    True    True    True   \n",
       "ppl_100003  type 2   type 5  type 2   ...     False   False    True    True   \n",
       "ppl_100004  type 4  type 16  type 2   ...      True    True    True    True   \n",
       "ppl_100006  type 3   type 8  type 2   ...     False   False    True   False   \n",
       "\n",
       "           char_33 char_34 char_35 char_36 char_37 char_38  \n",
       "people_id                                                   \n",
       "ppl_100      False    True    True    True   False      36  \n",
       "ppl_100002    True    True    True    True   False      76  \n",
       "ppl_100003    True    True   False    True    True      99  \n",
       "ppl_100004    True    True    True    True    True      76  \n",
       "ppl_100006   False   False    True    True   False      84  \n",
       "\n",
       "[5 rows x 40 columns]"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "people.set_index(keys=['people_id'],drop=True,append=False,inplace=True)\n",
    "people.head(5)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "act_train=pd.read_csv(\"data/act_train.csv\",sep=',',header=0,keep_default_na=True,parse_dates=['date'])\n",
    "act_train.set_index(keys=['people_id'],drop=True,append=False,inplace=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>activity_id</th>\n",
       "      <th>date</th>\n",
       "      <th>activity_category</th>\n",
       "      <th>char_1</th>\n",
       "      <th>char_2</th>\n",
       "      <th>char_3</th>\n",
       "      <th>char_4</th>\n",
       "      <th>char_5</th>\n",
       "      <th>char_6</th>\n",
       "      <th>char_7</th>\n",
       "      <th>char_8</th>\n",
       "      <th>char_9</th>\n",
       "      <th>char_10</th>\n",
       "      <th>outcome</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>people_id</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>ppl_100</th>\n",
       "      <td>act2_1734928</td>\n",
       "      <td>2023-08-26</td>\n",
       "      <td>type 4</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>type 76</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>ppl_100</th>\n",
       "      <td>act2_2434093</td>\n",
       "      <td>2022-09-27</td>\n",
       "      <td>type 2</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>type 1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>ppl_100</th>\n",
       "      <td>act2_3404049</td>\n",
       "      <td>2022-09-27</td>\n",
       "      <td>type 2</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>type 1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>ppl_100</th>\n",
       "      <td>act2_3651215</td>\n",
       "      <td>2023-08-04</td>\n",
       "      <td>type 2</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>type 1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>ppl_100</th>\n",
       "      <td>act2_4109017</td>\n",
       "      <td>2023-08-26</td>\n",
       "      <td>type 2</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>type 1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>ppl_100</th>\n",
       "      <td>act2_898576</td>\n",
       "      <td>2023-08-04</td>\n",
       "      <td>type 4</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>type 1727</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>ppl_100002</th>\n",
       "      <td>act2_1233489</td>\n",
       "      <td>2022-11-23</td>\n",
       "      <td>type 2</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>type 1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>ppl_100002</th>\n",
       "      <td>act2_1623405</td>\n",
       "      <td>2022-11-23</td>\n",
       "      <td>type 2</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>type 1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>ppl_100003</th>\n",
       "      <td>act2_1111598</td>\n",
       "      <td>2023-02-07</td>\n",
       "      <td>type 2</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>type 1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>ppl_100003</th>\n",
       "      <td>act2_1177453</td>\n",
       "      <td>2023-06-28</td>\n",
       "      <td>type 2</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>type 1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "             activity_id       date activity_category char_1 char_2 char_3  \\\n",
       "people_id                                                                    \n",
       "ppl_100     act2_1734928 2023-08-26            type 4    NaN    NaN    NaN   \n",
       "ppl_100     act2_2434093 2022-09-27            type 2    NaN    NaN    NaN   \n",
       "ppl_100     act2_3404049 2022-09-27            type 2    NaN    NaN    NaN   \n",
       "ppl_100     act2_3651215 2023-08-04            type 2    NaN    NaN    NaN   \n",
       "ppl_100     act2_4109017 2023-08-26            type 2    NaN    NaN    NaN   \n",
       "ppl_100      act2_898576 2023-08-04            type 4    NaN    NaN    NaN   \n",
       "ppl_100002  act2_1233489 2022-11-23            type 2    NaN    NaN    NaN   \n",
       "ppl_100002  act2_1623405 2022-11-23            type 2    NaN    NaN    NaN   \n",
       "ppl_100003  act2_1111598 2023-02-07            type 2    NaN    NaN    NaN   \n",
       "ppl_100003  act2_1177453 2023-06-28            type 2    NaN    NaN    NaN   \n",
       "\n",
       "           char_4 char_5 char_6 char_7 char_8 char_9    char_10  outcome  \n",
       "people_id                                                                 \n",
       "ppl_100       NaN    NaN    NaN    NaN    NaN    NaN    type 76        0  \n",
       "ppl_100       NaN    NaN    NaN    NaN    NaN    NaN     type 1        0  \n",
       "ppl_100       NaN    NaN    NaN    NaN    NaN    NaN     type 1        0  \n",
       "ppl_100       NaN    NaN    NaN    NaN    NaN    NaN     type 1        0  \n",
       "ppl_100       NaN    NaN    NaN    NaN    NaN    NaN     type 1        0  \n",
       "ppl_100       NaN    NaN    NaN    NaN    NaN    NaN  type 1727        0  \n",
       "ppl_100002    NaN    NaN    NaN    NaN    NaN    NaN     type 1        1  \n",
       "ppl_100002    NaN    NaN    NaN    NaN    NaN    NaN     type 1        1  \n",
       "ppl_100003    NaN    NaN    NaN    NaN    NaN    NaN     type 1        1  \n",
       "ppl_100003    NaN    NaN    NaN    NaN    NaN    NaN     type 1        1  "
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "act_train.head(10)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "act_test=pd.read_csv(\"data/act_test.csv\",sep=',',header=0,keep_default_na=True,parse_dates=['date'])\n",
    "act_test.set_index(keys=['people_id'],drop=True,append=False,inplace=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>activity_id</th>\n",
       "      <th>date</th>\n",
       "      <th>activity_category</th>\n",
       "      <th>char_1</th>\n",
       "      <th>char_2</th>\n",
       "      <th>char_3</th>\n",
       "      <th>char_4</th>\n",
       "      <th>char_5</th>\n",
       "      <th>char_6</th>\n",
       "      <th>char_7</th>\n",
       "      <th>char_8</th>\n",
       "      <th>char_9</th>\n",
       "      <th>char_10</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>people_id</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>ppl_100004</th>\n",
       "      <td>act1_249281</td>\n",
       "      <td>2022-07-20</td>\n",
       "      <td>type 1</td>\n",
       "      <td>type 5</td>\n",
       "      <td>type 10</td>\n",
       "      <td>type 5</td>\n",
       "      <td>type 1</td>\n",
       "      <td>type 6</td>\n",
       "      <td>type 1</td>\n",
       "      <td>type 1</td>\n",
       "      <td>type 7</td>\n",
       "      <td>type 4</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>ppl_100004</th>\n",
       "      <td>act2_230855</td>\n",
       "      <td>2022-07-20</td>\n",
       "      <td>type 5</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>type 682</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>ppl_10001</th>\n",
       "      <td>act1_240724</td>\n",
       "      <td>2022-10-14</td>\n",
       "      <td>type 1</td>\n",
       "      <td>type 12</td>\n",
       "      <td>type 1</td>\n",
       "      <td>type 5</td>\n",
       "      <td>type 4</td>\n",
       "      <td>type 6</td>\n",
       "      <td>type 1</td>\n",
       "      <td>type 1</td>\n",
       "      <td>type 13</td>\n",
       "      <td>type 10</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>ppl_10001</th>\n",
       "      <td>act1_83552</td>\n",
       "      <td>2022-11-27</td>\n",
       "      <td>type 1</td>\n",
       "      <td>type 20</td>\n",
       "      <td>type 10</td>\n",
       "      <td>type 5</td>\n",
       "      <td>type 4</td>\n",
       "      <td>type 6</td>\n",
       "      <td>type 1</td>\n",
       "      <td>type 1</td>\n",
       "      <td>type 5</td>\n",
       "      <td>type 5</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>ppl_10001</th>\n",
       "      <td>act2_1043301</td>\n",
       "      <td>2022-10-15</td>\n",
       "      <td>type 5</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>type 3015</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>ppl_10001</th>\n",
       "      <td>act2_112890</td>\n",
       "      <td>2022-11-27</td>\n",
       "      <td>type 5</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>type 4987</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>ppl_10001</th>\n",
       "      <td>act2_1169930</td>\n",
       "      <td>2022-10-15</td>\n",
       "      <td>type 5</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>type 3015</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>ppl_10001</th>\n",
       "      <td>act2_1924448</td>\n",
       "      <td>2022-10-15</td>\n",
       "      <td>type 5</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>type 3015</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>ppl_10001</th>\n",
       "      <td>act2_1953554</td>\n",
       "      <td>2022-10-15</td>\n",
       "      <td>type 5</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>type 3015</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>ppl_10001</th>\n",
       "      <td>act2_1971739</td>\n",
       "      <td>2022-11-28</td>\n",
       "      <td>type 5</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>type 3015</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "             activity_id       date activity_category   char_1   char_2  \\\n",
       "people_id                                                                 \n",
       "ppl_100004   act1_249281 2022-07-20            type 1   type 5  type 10   \n",
       "ppl_100004   act2_230855 2022-07-20            type 5      NaN      NaN   \n",
       "ppl_10001    act1_240724 2022-10-14            type 1  type 12   type 1   \n",
       "ppl_10001     act1_83552 2022-11-27            type 1  type 20  type 10   \n",
       "ppl_10001   act2_1043301 2022-10-15            type 5      NaN      NaN   \n",
       "ppl_10001    act2_112890 2022-11-27            type 5      NaN      NaN   \n",
       "ppl_10001   act2_1169930 2022-10-15            type 5      NaN      NaN   \n",
       "ppl_10001   act2_1924448 2022-10-15            type 5      NaN      NaN   \n",
       "ppl_10001   act2_1953554 2022-10-15            type 5      NaN      NaN   \n",
       "ppl_10001   act2_1971739 2022-11-28            type 5      NaN      NaN   \n",
       "\n",
       "            char_3  char_4  char_5  char_6  char_7   char_8   char_9  \\\n",
       "people_id                                                              \n",
       "ppl_100004  type 5  type 1  type 6  type 1  type 1   type 7   type 4   \n",
       "ppl_100004     NaN     NaN     NaN     NaN     NaN      NaN      NaN   \n",
       "ppl_10001   type 5  type 4  type 6  type 1  type 1  type 13  type 10   \n",
       "ppl_10001   type 5  type 4  type 6  type 1  type 1   type 5   type 5   \n",
       "ppl_10001      NaN     NaN     NaN     NaN     NaN      NaN      NaN   \n",
       "ppl_10001      NaN     NaN     NaN     NaN     NaN      NaN      NaN   \n",
       "ppl_10001      NaN     NaN     NaN     NaN     NaN      NaN      NaN   \n",
       "ppl_10001      NaN     NaN     NaN     NaN     NaN      NaN      NaN   \n",
       "ppl_10001      NaN     NaN     NaN     NaN     NaN      NaN      NaN   \n",
       "ppl_10001      NaN     NaN     NaN     NaN     NaN      NaN      NaN   \n",
       "\n",
       "              char_10  \n",
       "people_id              \n",
       "ppl_100004        NaN  \n",
       "ppl_100004   type 682  \n",
       "ppl_10001         NaN  \n",
       "ppl_10001         NaN  \n",
       "ppl_10001   type 3015  \n",
       "ppl_10001   type 4987  \n",
       "ppl_10001   type 3015  \n",
       "ppl_10001   type 3015  \n",
       "ppl_10001   type 3015  \n",
       "ppl_10001   type 3015  "
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "act_test.head(10)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>activity_id</th>\n",
       "      <th>date_act</th>\n",
       "      <th>activity_category</th>\n",
       "      <th>char_1_act</th>\n",
       "      <th>char_2_act</th>\n",
       "      <th>char_3_act</th>\n",
       "      <th>char_4_act</th>\n",
       "      <th>char_5_act</th>\n",
       "      <th>char_6_act</th>\n",
       "      <th>char_7_act</th>\n",
       "      <th>...</th>\n",
       "      <th>char_29</th>\n",
       "      <th>char_30</th>\n",
       "      <th>char_31</th>\n",
       "      <th>char_32</th>\n",
       "      <th>char_33</th>\n",
       "      <th>char_34</th>\n",
       "      <th>char_35</th>\n",
       "      <th>char_36</th>\n",
       "      <th>char_37</th>\n",
       "      <th>char_38</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>people_id</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>ppl_100</th>\n",
       "      <td>act2_1734928</td>\n",
       "      <td>2023-08-26</td>\n",
       "      <td>type 4</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>36</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>ppl_100</th>\n",
       "      <td>act2_2434093</td>\n",
       "      <td>2022-09-27</td>\n",
       "      <td>type 2</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>36</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>ppl_100</th>\n",
       "      <td>act2_3404049</td>\n",
       "      <td>2022-09-27</td>\n",
       "      <td>type 2</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>36</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>ppl_100</th>\n",
       "      <td>act2_3651215</td>\n",
       "      <td>2023-08-04</td>\n",
       "      <td>type 2</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>36</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>ppl_100</th>\n",
       "      <td>act2_4109017</td>\n",
       "      <td>2023-08-26</td>\n",
       "      <td>type 2</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>36</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>ppl_100</th>\n",
       "      <td>act2_898576</td>\n",
       "      <td>2023-08-04</td>\n",
       "      <td>type 4</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>36</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>ppl_100002</th>\n",
       "      <td>act2_1233489</td>\n",
       "      <td>2022-11-23</td>\n",
       "      <td>type 2</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>76</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>ppl_100002</th>\n",
       "      <td>act2_1623405</td>\n",
       "      <td>2022-11-23</td>\n",
       "      <td>type 2</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>76</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>ppl_100003</th>\n",
       "      <td>act2_1111598</td>\n",
       "      <td>2023-02-07</td>\n",
       "      <td>type 2</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>99</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>ppl_100003</th>\n",
       "      <td>act2_1177453</td>\n",
       "      <td>2023-06-28</td>\n",
       "      <td>type 2</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>99</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>10 rows × 54 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "             activity_id   date_act activity_category char_1_act char_2_act  \\\n",
       "people_id                                                                     \n",
       "ppl_100     act2_1734928 2023-08-26            type 4        NaN        NaN   \n",
       "ppl_100     act2_2434093 2022-09-27            type 2        NaN        NaN   \n",
       "ppl_100     act2_3404049 2022-09-27            type 2        NaN        NaN   \n",
       "ppl_100     act2_3651215 2023-08-04            type 2        NaN        NaN   \n",
       "ppl_100     act2_4109017 2023-08-26            type 2        NaN        NaN   \n",
       "ppl_100      act2_898576 2023-08-04            type 4        NaN        NaN   \n",
       "ppl_100002  act2_1233489 2022-11-23            type 2        NaN        NaN   \n",
       "ppl_100002  act2_1623405 2022-11-23            type 2        NaN        NaN   \n",
       "ppl_100003  act2_1111598 2023-02-07            type 2        NaN        NaN   \n",
       "ppl_100003  act2_1177453 2023-06-28            type 2        NaN        NaN   \n",
       "\n",
       "           char_3_act char_4_act char_5_act char_6_act char_7_act   ...    \\\n",
       "people_id                                                           ...     \n",
       "ppl_100           NaN        NaN        NaN        NaN        NaN   ...     \n",
       "ppl_100           NaN        NaN        NaN        NaN        NaN   ...     \n",
       "ppl_100           NaN        NaN        NaN        NaN        NaN   ...     \n",
       "ppl_100           NaN        NaN        NaN        NaN        NaN   ...     \n",
       "ppl_100           NaN        NaN        NaN        NaN        NaN   ...     \n",
       "ppl_100           NaN        NaN        NaN        NaN        NaN   ...     \n",
       "ppl_100002        NaN        NaN        NaN        NaN        NaN   ...     \n",
       "ppl_100002        NaN        NaN        NaN        NaN        NaN   ...     \n",
       "ppl_100003        NaN        NaN        NaN        NaN        NaN   ...     \n",
       "ppl_100003        NaN        NaN        NaN        NaN        NaN   ...     \n",
       "\n",
       "           char_29 char_30 char_31  char_32 char_33 char_34 char_35 char_36  \\\n",
       "people_id                                                                     \n",
       "ppl_100      False    True    True    False   False    True    True    True   \n",
       "ppl_100      False    True    True    False   False    True    True    True   \n",
       "ppl_100      False    True    True    False   False    True    True    True   \n",
       "ppl_100      False    True    True    False   False    True    True    True   \n",
       "ppl_100      False    True    True    False   False    True    True    True   \n",
       "ppl_100      False    True    True    False   False    True    True    True   \n",
       "ppl_100002   False    True    True     True    True    True    True    True   \n",
       "ppl_100002   False    True    True     True    True    True    True    True   \n",
       "ppl_100003   False   False    True     True    True    True   False    True   \n",
       "ppl_100003   False   False    True     True    True    True   False    True   \n",
       "\n",
       "           char_37 char_38  \n",
       "people_id                   \n",
       "ppl_100      False      36  \n",
       "ppl_100      False      36  \n",
       "ppl_100      False      36  \n",
       "ppl_100      False      36  \n",
       "ppl_100      False      36  \n",
       "ppl_100      False      36  \n",
       "ppl_100002   False      76  \n",
       "ppl_100002   False      76  \n",
       "ppl_100003    True      99  \n",
       "ppl_100003    True      99  \n",
       "\n",
       "[10 rows x 54 columns]"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train_data=act_train.merge(people,how='left',left_index=True,right_index=True,suffixes=('_act', '_people'))\n",
    "train_data.head(10)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>activity_id</th>\n",
       "      <th>date_act</th>\n",
       "      <th>activity_category</th>\n",
       "      <th>char_1_act</th>\n",
       "      <th>char_2_act</th>\n",
       "      <th>char_3_act</th>\n",
       "      <th>char_4_act</th>\n",
       "      <th>char_5_act</th>\n",
       "      <th>char_6_act</th>\n",
       "      <th>char_7_act</th>\n",
       "      <th>...</th>\n",
       "      <th>char_29</th>\n",
       "      <th>char_30</th>\n",
       "      <th>char_31</th>\n",
       "      <th>char_32</th>\n",
       "      <th>char_33</th>\n",
       "      <th>char_34</th>\n",
       "      <th>char_35</th>\n",
       "      <th>char_36</th>\n",
       "      <th>char_37</th>\n",
       "      <th>char_38</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>people_id</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>ppl_100004</th>\n",
       "      <td>act1_249281</td>\n",
       "      <td>2022-07-20</td>\n",
       "      <td>type 1</td>\n",
       "      <td>type 5</td>\n",
       "      <td>type 10</td>\n",
       "      <td>type 5</td>\n",
       "      <td>type 1</td>\n",
       "      <td>type 6</td>\n",
       "      <td>type 1</td>\n",
       "      <td>type 1</td>\n",
       "      <td>...</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>76</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>ppl_100004</th>\n",
       "      <td>act2_230855</td>\n",
       "      <td>2022-07-20</td>\n",
       "      <td>type 5</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>76</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>ppl_10001</th>\n",
       "      <td>act1_240724</td>\n",
       "      <td>2022-10-14</td>\n",
       "      <td>type 1</td>\n",
       "      <td>type 12</td>\n",
       "      <td>type 1</td>\n",
       "      <td>type 5</td>\n",
       "      <td>type 4</td>\n",
       "      <td>type 6</td>\n",
       "      <td>type 1</td>\n",
       "      <td>type 1</td>\n",
       "      <td>...</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>90</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>ppl_10001</th>\n",
       "      <td>act1_83552</td>\n",
       "      <td>2022-11-27</td>\n",
       "      <td>type 1</td>\n",
       "      <td>type 20</td>\n",
       "      <td>type 10</td>\n",
       "      <td>type 5</td>\n",
       "      <td>type 4</td>\n",
       "      <td>type 6</td>\n",
       "      <td>type 1</td>\n",
       "      <td>type 1</td>\n",
       "      <td>...</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>90</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>ppl_10001</th>\n",
       "      <td>act2_1043301</td>\n",
       "      <td>2022-10-15</td>\n",
       "      <td>type 5</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>90</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>ppl_10001</th>\n",
       "      <td>act2_112890</td>\n",
       "      <td>2022-11-27</td>\n",
       "      <td>type 5</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>90</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>ppl_10001</th>\n",
       "      <td>act2_1169930</td>\n",
       "      <td>2022-10-15</td>\n",
       "      <td>type 5</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>90</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>ppl_10001</th>\n",
       "      <td>act2_1924448</td>\n",
       "      <td>2022-10-15</td>\n",
       "      <td>type 5</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>90</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>ppl_10001</th>\n",
       "      <td>act2_1953554</td>\n",
       "      <td>2022-10-15</td>\n",
       "      <td>type 5</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>90</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>ppl_10001</th>\n",
       "      <td>act2_1971739</td>\n",
       "      <td>2022-11-28</td>\n",
       "      <td>type 5</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>90</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>10 rows × 53 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "             activity_id   date_act activity_category char_1_act char_2_act  \\\n",
       "people_id                                                                     \n",
       "ppl_100004   act1_249281 2022-07-20            type 1     type 5    type 10   \n",
       "ppl_100004   act2_230855 2022-07-20            type 5        NaN        NaN   \n",
       "ppl_10001    act1_240724 2022-10-14            type 1    type 12     type 1   \n",
       "ppl_10001     act1_83552 2022-11-27            type 1    type 20    type 10   \n",
       "ppl_10001   act2_1043301 2022-10-15            type 5        NaN        NaN   \n",
       "ppl_10001    act2_112890 2022-11-27            type 5        NaN        NaN   \n",
       "ppl_10001   act2_1169930 2022-10-15            type 5        NaN        NaN   \n",
       "ppl_10001   act2_1924448 2022-10-15            type 5        NaN        NaN   \n",
       "ppl_10001   act2_1953554 2022-10-15            type 5        NaN        NaN   \n",
       "ppl_10001   act2_1971739 2022-11-28            type 5        NaN        NaN   \n",
       "\n",
       "           char_3_act char_4_act char_5_act char_6_act char_7_act   ...    \\\n",
       "people_id                                                           ...     \n",
       "ppl_100004     type 5     type 1     type 6     type 1     type 1   ...     \n",
       "ppl_100004        NaN        NaN        NaN        NaN        NaN   ...     \n",
       "ppl_10001      type 5     type 4     type 6     type 1     type 1   ...     \n",
       "ppl_10001      type 5     type 4     type 6     type 1     type 1   ...     \n",
       "ppl_10001         NaN        NaN        NaN        NaN        NaN   ...     \n",
       "ppl_10001         NaN        NaN        NaN        NaN        NaN   ...     \n",
       "ppl_10001         NaN        NaN        NaN        NaN        NaN   ...     \n",
       "ppl_10001         NaN        NaN        NaN        NaN        NaN   ...     \n",
       "ppl_10001         NaN        NaN        NaN        NaN        NaN   ...     \n",
       "ppl_10001         NaN        NaN        NaN        NaN        NaN   ...     \n",
       "\n",
       "           char_29 char_30 char_31 char_32 char_33 char_34 char_35 char_36  \\\n",
       "people_id                                                                    \n",
       "ppl_100004    True    True    True    True    True    True    True    True   \n",
       "ppl_100004    True    True    True    True    True    True    True    True   \n",
       "ppl_10001    False    True    True    True    True    True    True    True   \n",
       "ppl_10001    False    True    True    True    True    True    True    True   \n",
       "ppl_10001    False    True    True    True    True    True    True    True   \n",
       "ppl_10001    False    True    True    True    True    True    True    True   \n",
       "ppl_10001    False    True    True    True    True    True    True    True   \n",
       "ppl_10001    False    True    True    True    True    True    True    True   \n",
       "ppl_10001    False    True    True    True    True    True    True    True   \n",
       "ppl_10001    False    True    True    True    True    True    True    True   \n",
       "\n",
       "           char_37 char_38  \n",
       "people_id                   \n",
       "ppl_100004    True      76  \n",
       "ppl_100004    True      76  \n",
       "ppl_10001     True      90  \n",
       "ppl_10001     True      90  \n",
       "ppl_10001     True      90  \n",
       "ppl_10001     True      90  \n",
       "ppl_10001     True      90  \n",
       "ppl_10001     True      90  \n",
       "ppl_10001     True      90  \n",
       "ppl_10001     True      90  \n",
       "\n",
       "[10 rows x 53 columns]"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "test_data=act_test.merge(people,how='left',left_index=True,right_index=True,suffixes=('_act', '_people'))\n",
    "test_data.head(10)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "type 2    904683\n",
       "type 5    490710\n",
       "type 3    429408\n",
       "type 4    207465\n",
       "type 1    157615\n",
       "type 6      4253\n",
       "type 7      3157\n",
       "Name: activity_category, dtype: int64"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train_data.activity_category.value_counts()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['type 1']\n",
      "['type 1']\n",
      "['type 2']\n",
      "['type 2']\n",
      "['type 3']\n",
      "['type 3']\n",
      "['type 4']\n",
      "['type 4']\n",
      "['type 5']\n",
      "['type 5']\n",
      "['type 6']\n",
      "['type 6']\n",
      "['type 7']\n",
      "['type 7']\n"
     ]
    }
   ],
   "source": [
    "types=['type %d'%i for i in range(1,8)]\n",
    "train_datas={}\n",
    "test_datas={}\n",
    "for _type in types:\n",
    "    train_datas[_type]=train_data[train_data.activity_category==_type].dropna(axis=(0,1), how='all')\n",
    "    test_datas[_type]=test_data[test_data.activity_category==_type].dropna(axis=(0,1), how='all')\n",
    "    print(train_datas[_type].activity_category.unique())\n",
    "    print(test_datas[_type].activity_category.unique())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>activity_id</th>\n",
       "      <th>date_act</th>\n",
       "      <th>activity_category</th>\n",
       "      <th>char_1_act</th>\n",
       "      <th>char_2_act</th>\n",
       "      <th>char_3_act</th>\n",
       "      <th>char_4_act</th>\n",
       "      <th>char_5_act</th>\n",
       "      <th>char_6_act</th>\n",
       "      <th>char_7_act</th>\n",
       "      <th>...</th>\n",
       "      <th>char_29</th>\n",
       "      <th>char_30</th>\n",
       "      <th>char_31</th>\n",
       "      <th>char_32</th>\n",
       "      <th>char_33</th>\n",
       "      <th>char_34</th>\n",
       "      <th>char_35</th>\n",
       "      <th>char_36</th>\n",
       "      <th>char_37</th>\n",
       "      <th>char_38</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>people_id</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>ppl_100025</th>\n",
       "      <td>act1_9923</td>\n",
       "      <td>2022-11-25</td>\n",
       "      <td>type 1</td>\n",
       "      <td>type 3</td>\n",
       "      <td>type 5</td>\n",
       "      <td>type 1</td>\n",
       "      <td>type 1</td>\n",
       "      <td>type 6</td>\n",
       "      <td>type 3</td>\n",
       "      <td>type 3</td>\n",
       "      <td>...</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>76</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>ppl_100033</th>\n",
       "      <td>act1_198174</td>\n",
       "      <td>2022-07-26</td>\n",
       "      <td>type 1</td>\n",
       "      <td>type 36</td>\n",
       "      <td>type 11</td>\n",
       "      <td>type 5</td>\n",
       "      <td>type 1</td>\n",
       "      <td>type 6</td>\n",
       "      <td>type 1</td>\n",
       "      <td>type 1</td>\n",
       "      <td>...</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>2 rows × 53 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "            activity_id   date_act activity_category char_1_act char_2_act  \\\n",
       "people_id                                                                    \n",
       "ppl_100025    act1_9923 2022-11-25            type 1     type 3     type 5   \n",
       "ppl_100033  act1_198174 2022-07-26            type 1    type 36    type 11   \n",
       "\n",
       "           char_3_act char_4_act char_5_act char_6_act char_7_act   ...    \\\n",
       "people_id                                                           ...     \n",
       "ppl_100025     type 1     type 1     type 6     type 3     type 3   ...     \n",
       "ppl_100033     type 5     type 1     type 6     type 1     type 1   ...     \n",
       "\n",
       "           char_29 char_30  char_31 char_32 char_33 char_34 char_35 char_36  \\\n",
       "people_id                                                                     \n",
       "ppl_100025   False   False    False   False   False   False   False   False   \n",
       "ppl_100033   False   False    False   False   False   False   False   False   \n",
       "\n",
       "           char_37 char_38  \n",
       "people_id                   \n",
       "ppl_100025   False      76  \n",
       "ppl_100033   False       0  \n",
       "\n",
       "[2 rows x 53 columns]"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train_datas['type 1'].head(2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>activity_id</th>\n",
       "      <th>date_act</th>\n",
       "      <th>activity_category</th>\n",
       "      <th>char_10_act</th>\n",
       "      <th>outcome</th>\n",
       "      <th>char_1_people</th>\n",
       "      <th>group_1</th>\n",
       "      <th>char_2_people</th>\n",
       "      <th>date_people</th>\n",
       "      <th>char_3_people</th>\n",
       "      <th>...</th>\n",
       "      <th>char_29</th>\n",
       "      <th>char_30</th>\n",
       "      <th>char_31</th>\n",
       "      <th>char_32</th>\n",
       "      <th>char_33</th>\n",
       "      <th>char_34</th>\n",
       "      <th>char_35</th>\n",
       "      <th>char_36</th>\n",
       "      <th>char_37</th>\n",
       "      <th>char_38</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>people_id</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>ppl_100</th>\n",
       "      <td>act2_2434093</td>\n",
       "      <td>2022-09-27</td>\n",
       "      <td>type 2</td>\n",
       "      <td>type 1</td>\n",
       "      <td>0</td>\n",
       "      <td>type 2</td>\n",
       "      <td>group 17304</td>\n",
       "      <td>type 2</td>\n",
       "      <td>2021-06-29</td>\n",
       "      <td>type 5</td>\n",
       "      <td>...</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>36</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>ppl_100</th>\n",
       "      <td>act2_3404049</td>\n",
       "      <td>2022-09-27</td>\n",
       "      <td>type 2</td>\n",
       "      <td>type 1</td>\n",
       "      <td>0</td>\n",
       "      <td>type 2</td>\n",
       "      <td>group 17304</td>\n",
       "      <td>type 2</td>\n",
       "      <td>2021-06-29</td>\n",
       "      <td>type 5</td>\n",
       "      <td>...</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>36</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>2 rows × 45 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "            activity_id   date_act activity_category char_10_act  outcome  \\\n",
       "people_id                                                                   \n",
       "ppl_100    act2_2434093 2022-09-27            type 2      type 1        0   \n",
       "ppl_100    act2_3404049 2022-09-27            type 2      type 1        0   \n",
       "\n",
       "          char_1_people      group_1 char_2_people date_people char_3_people  \\\n",
       "people_id                                                                      \n",
       "ppl_100          type 2  group 17304        type 2  2021-06-29        type 5   \n",
       "ppl_100          type 2  group 17304        type 2  2021-06-29        type 5   \n",
       "\n",
       "            ...   char_29 char_30 char_31 char_32 char_33 char_34 char_35  \\\n",
       "people_id   ...                                                             \n",
       "ppl_100     ...     False    True    True   False   False    True    True   \n",
       "ppl_100     ...     False    True    True   False   False    True    True   \n",
       "\n",
       "          char_36 char_37 char_38  \n",
       "people_id                          \n",
       "ppl_100      True   False      36  \n",
       "ppl_100      True   False      36  \n",
       "\n",
       "[2 rows x 45 columns]"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train_datas['type 2'].head(2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>activity_id</th>\n",
       "      <th>date_act</th>\n",
       "      <th>activity_category</th>\n",
       "      <th>char_10_act</th>\n",
       "      <th>outcome</th>\n",
       "      <th>char_1_people</th>\n",
       "      <th>group_1</th>\n",
       "      <th>char_2_people</th>\n",
       "      <th>date_people</th>\n",
       "      <th>char_3_people</th>\n",
       "      <th>...</th>\n",
       "      <th>char_29</th>\n",
       "      <th>char_30</th>\n",
       "      <th>char_31</th>\n",
       "      <th>char_32</th>\n",
       "      <th>char_33</th>\n",
       "      <th>char_34</th>\n",
       "      <th>char_35</th>\n",
       "      <th>char_36</th>\n",
       "      <th>char_37</th>\n",
       "      <th>char_38</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>people_id</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>ppl_100006</th>\n",
       "      <td>act2_2380649</td>\n",
       "      <td>2022-07-27</td>\n",
       "      <td>type 3</td>\n",
       "      <td>type 114</td>\n",
       "      <td>1</td>\n",
       "      <td>type 2</td>\n",
       "      <td>group 6534</td>\n",
       "      <td>type 3</td>\n",
       "      <td>2022-07-27</td>\n",
       "      <td>type 40</td>\n",
       "      <td>...</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>84</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>ppl_100019</th>\n",
       "      <td>act2_1681678</td>\n",
       "      <td>2023-03-26</td>\n",
       "      <td>type 3</td>\n",
       "      <td>type 2</td>\n",
       "      <td>1</td>\n",
       "      <td>type 2</td>\n",
       "      <td>group 45749</td>\n",
       "      <td>type 3</td>\n",
       "      <td>2023-03-26</td>\n",
       "      <td>type 40</td>\n",
       "      <td>...</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>84</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>2 rows × 45 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "             activity_id   date_act activity_category char_10_act  outcome  \\\n",
       "people_id                                                                    \n",
       "ppl_100006  act2_2380649 2022-07-27            type 3    type 114        1   \n",
       "ppl_100019  act2_1681678 2023-03-26            type 3      type 2        1   \n",
       "\n",
       "           char_1_people      group_1 char_2_people date_people char_3_people  \\\n",
       "people_id                                                                       \n",
       "ppl_100006        type 2   group 6534        type 3  2022-07-27       type 40   \n",
       "ppl_100019        type 2  group 45749        type 3  2023-03-26       type 40   \n",
       "\n",
       "             ...   char_29 char_30 char_31 char_32 char_33 char_34 char_35  \\\n",
       "people_id    ...                                                             \n",
       "ppl_100006   ...     False   False    True   False   False   False    True   \n",
       "ppl_100019   ...     False   False   False   False   False   False   False   \n",
       "\n",
       "           char_36 char_37 char_38  \n",
       "people_id                           \n",
       "ppl_100006    True   False      84  \n",
       "ppl_100019   False   False      84  \n",
       "\n",
       "[2 rows x 45 columns]"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train_datas['type 3'].head(2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>activity_id</th>\n",
       "      <th>date_act</th>\n",
       "      <th>activity_category</th>\n",
       "      <th>char_10_act</th>\n",
       "      <th>outcome</th>\n",
       "      <th>char_1_people</th>\n",
       "      <th>group_1</th>\n",
       "      <th>char_2_people</th>\n",
       "      <th>date_people</th>\n",
       "      <th>char_3_people</th>\n",
       "      <th>...</th>\n",
       "      <th>char_29</th>\n",
       "      <th>char_30</th>\n",
       "      <th>char_31</th>\n",
       "      <th>char_32</th>\n",
       "      <th>char_33</th>\n",
       "      <th>char_34</th>\n",
       "      <th>char_35</th>\n",
       "      <th>char_36</th>\n",
       "      <th>char_37</th>\n",
       "      <th>char_38</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>people_id</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>ppl_100</th>\n",
       "      <td>act2_1734928</td>\n",
       "      <td>2023-08-26</td>\n",
       "      <td>type 4</td>\n",
       "      <td>type 76</td>\n",
       "      <td>0</td>\n",
       "      <td>type 2</td>\n",
       "      <td>group 17304</td>\n",
       "      <td>type 2</td>\n",
       "      <td>2021-06-29</td>\n",
       "      <td>type 5</td>\n",
       "      <td>...</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>36</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>ppl_100</th>\n",
       "      <td>act2_898576</td>\n",
       "      <td>2023-08-04</td>\n",
       "      <td>type 4</td>\n",
       "      <td>type 1727</td>\n",
       "      <td>0</td>\n",
       "      <td>type 2</td>\n",
       "      <td>group 17304</td>\n",
       "      <td>type 2</td>\n",
       "      <td>2021-06-29</td>\n",
       "      <td>type 5</td>\n",
       "      <td>...</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>36</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>2 rows × 45 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "            activity_id   date_act activity_category char_10_act  outcome  \\\n",
       "people_id                                                                   \n",
       "ppl_100    act2_1734928 2023-08-26            type 4     type 76        0   \n",
       "ppl_100     act2_898576 2023-08-04            type 4   type 1727        0   \n",
       "\n",
       "          char_1_people      group_1 char_2_people date_people char_3_people  \\\n",
       "people_id                                                                      \n",
       "ppl_100          type 2  group 17304        type 2  2021-06-29        type 5   \n",
       "ppl_100          type 2  group 17304        type 2  2021-06-29        type 5   \n",
       "\n",
       "            ...   char_29 char_30 char_31 char_32 char_33 char_34 char_35  \\\n",
       "people_id   ...                                                             \n",
       "ppl_100     ...     False    True    True   False   False    True    True   \n",
       "ppl_100     ...     False    True    True   False   False    True    True   \n",
       "\n",
       "          char_36 char_37 char_38  \n",
       "people_id                          \n",
       "ppl_100      True   False      36  \n",
       "ppl_100      True   False      36  \n",
       "\n",
       "[2 rows x 45 columns]"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train_datas['type 4'].head(2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>activity_id</th>\n",
       "      <th>date_act</th>\n",
       "      <th>activity_category</th>\n",
       "      <th>char_10_act</th>\n",
       "      <th>outcome</th>\n",
       "      <th>char_1_people</th>\n",
       "      <th>group_1</th>\n",
       "      <th>char_2_people</th>\n",
       "      <th>date_people</th>\n",
       "      <th>char_3_people</th>\n",
       "      <th>...</th>\n",
       "      <th>char_29</th>\n",
       "      <th>char_30</th>\n",
       "      <th>char_31</th>\n",
       "      <th>char_32</th>\n",
       "      <th>char_33</th>\n",
       "      <th>char_34</th>\n",
       "      <th>char_35</th>\n",
       "      <th>char_36</th>\n",
       "      <th>char_37</th>\n",
       "      <th>char_38</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>people_id</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>ppl_100013</th>\n",
       "      <td>act2_1667803</td>\n",
       "      <td>2023-01-26</td>\n",
       "      <td>type 5</td>\n",
       "      <td>type 5493</td>\n",
       "      <td>1</td>\n",
       "      <td>type 2</td>\n",
       "      <td>group 4204</td>\n",
       "      <td>type 3</td>\n",
       "      <td>2023-01-24</td>\n",
       "      <td>type 4</td>\n",
       "      <td>...</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>91</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>ppl_100013</th>\n",
       "      <td>act2_2229</td>\n",
       "      <td>2023-01-25</td>\n",
       "      <td>type 5</td>\n",
       "      <td>type 5493</td>\n",
       "      <td>1</td>\n",
       "      <td>type 2</td>\n",
       "      <td>group 4204</td>\n",
       "      <td>type 3</td>\n",
       "      <td>2023-01-24</td>\n",
       "      <td>type 4</td>\n",
       "      <td>...</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>91</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>2 rows × 45 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "             activity_id   date_act activity_category char_10_act  outcome  \\\n",
       "people_id                                                                    \n",
       "ppl_100013  act2_1667803 2023-01-26            type 5   type 5493        1   \n",
       "ppl_100013     act2_2229 2023-01-25            type 5   type 5493        1   \n",
       "\n",
       "           char_1_people     group_1 char_2_people date_people char_3_people  \\\n",
       "people_id                                                                      \n",
       "ppl_100013        type 2  group 4204        type 3  2023-01-24        type 4   \n",
       "ppl_100013        type 2  group 4204        type 3  2023-01-24        type 4   \n",
       "\n",
       "             ...   char_29 char_30 char_31 char_32 char_33 char_34 char_35  \\\n",
       "people_id    ...                                                             \n",
       "ppl_100013   ...     False   False    True    True    True    True   False   \n",
       "ppl_100013   ...     False   False    True    True    True    True   False   \n",
       "\n",
       "           char_36 char_37 char_38  \n",
       "people_id                           \n",
       "ppl_100013    True    True      91  \n",
       "ppl_100013    True    True      91  \n",
       "\n",
       "[2 rows x 45 columns]"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train_datas['type 5'].head(2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>activity_id</th>\n",
       "      <th>date_act</th>\n",
       "      <th>activity_category</th>\n",
       "      <th>char_10_act</th>\n",
       "      <th>outcome</th>\n",
       "      <th>char_1_people</th>\n",
       "      <th>group_1</th>\n",
       "      <th>char_2_people</th>\n",
       "      <th>date_people</th>\n",
       "      <th>char_3_people</th>\n",
       "      <th>...</th>\n",
       "      <th>char_29</th>\n",
       "      <th>char_30</th>\n",
       "      <th>char_31</th>\n",
       "      <th>char_32</th>\n",
       "      <th>char_33</th>\n",
       "      <th>char_34</th>\n",
       "      <th>char_35</th>\n",
       "      <th>char_36</th>\n",
       "      <th>char_37</th>\n",
       "      <th>char_38</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>people_id</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>ppl_100175</th>\n",
       "      <td>act2_4192771</td>\n",
       "      <td>2023-02-28</td>\n",
       "      <td>type 6</td>\n",
       "      <td>type 110</td>\n",
       "      <td>1</td>\n",
       "      <td>type 2</td>\n",
       "      <td>group 18966</td>\n",
       "      <td>type 3</td>\n",
       "      <td>2023-02-27</td>\n",
       "      <td>type 7</td>\n",
       "      <td>...</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>93</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>ppl_100202</th>\n",
       "      <td>act2_4786070</td>\n",
       "      <td>2023-02-25</td>\n",
       "      <td>type 6</td>\n",
       "      <td>type 110</td>\n",
       "      <td>1</td>\n",
       "      <td>type 2</td>\n",
       "      <td>group 38766</td>\n",
       "      <td>type 3</td>\n",
       "      <td>2022-10-22</td>\n",
       "      <td>type 15</td>\n",
       "      <td>...</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>89</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>2 rows × 45 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "             activity_id   date_act activity_category char_10_act  outcome  \\\n",
       "people_id                                                                    \n",
       "ppl_100175  act2_4192771 2023-02-28            type 6    type 110        1   \n",
       "ppl_100202  act2_4786070 2023-02-25            type 6    type 110        1   \n",
       "\n",
       "           char_1_people      group_1 char_2_people date_people char_3_people  \\\n",
       "people_id                                                                       \n",
       "ppl_100175        type 2  group 18966        type 3  2023-02-27        type 7   \n",
       "ppl_100202        type 2  group 38766        type 3  2022-10-22       type 15   \n",
       "\n",
       "             ...   char_29 char_30 char_31 char_32 char_33 char_34 char_35  \\\n",
       "people_id    ...                                                             \n",
       "ppl_100175   ...      True   False    True    True   False    True   False   \n",
       "ppl_100202   ...     False   False    True    True    True    True    True   \n",
       "\n",
       "           char_36 char_37 char_38  \n",
       "people_id                           \n",
       "ppl_100175   False   False      93  \n",
       "ppl_100202    True    True      89  \n",
       "\n",
       "[2 rows x 45 columns]"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train_datas['type 6'].head(2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>activity_id</th>\n",
       "      <th>date_act</th>\n",
       "      <th>activity_category</th>\n",
       "      <th>char_10_act</th>\n",
       "      <th>outcome</th>\n",
       "      <th>char_1_people</th>\n",
       "      <th>group_1</th>\n",
       "      <th>char_2_people</th>\n",
       "      <th>date_people</th>\n",
       "      <th>char_3_people</th>\n",
       "      <th>...</th>\n",
       "      <th>char_29</th>\n",
       "      <th>char_30</th>\n",
       "      <th>char_31</th>\n",
       "      <th>char_32</th>\n",
       "      <th>char_33</th>\n",
       "      <th>char_34</th>\n",
       "      <th>char_35</th>\n",
       "      <th>char_36</th>\n",
       "      <th>char_37</th>\n",
       "      <th>char_38</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>people_id</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>ppl_100133</th>\n",
       "      <td>act2_1723517</td>\n",
       "      <td>2022-11-30</td>\n",
       "      <td>type 7</td>\n",
       "      <td>type 194</td>\n",
       "      <td>0</td>\n",
       "      <td>type 2</td>\n",
       "      <td>group 17304</td>\n",
       "      <td>type 2</td>\n",
       "      <td>2022-11-29</td>\n",
       "      <td>type 4</td>\n",
       "      <td>...</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>ppl_100212</th>\n",
       "      <td>act2_450045</td>\n",
       "      <td>2023-06-21</td>\n",
       "      <td>type 7</td>\n",
       "      <td>type 194</td>\n",
       "      <td>1</td>\n",
       "      <td>type 2</td>\n",
       "      <td>group 31542</td>\n",
       "      <td>type 3</td>\n",
       "      <td>2023-06-20</td>\n",
       "      <td>type 5</td>\n",
       "      <td>...</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>87</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>2 rows × 45 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "             activity_id   date_act activity_category char_10_act  outcome  \\\n",
       "people_id                                                                    \n",
       "ppl_100133  act2_1723517 2022-11-30            type 7    type 194        0   \n",
       "ppl_100212   act2_450045 2023-06-21            type 7    type 194        1   \n",
       "\n",
       "           char_1_people      group_1 char_2_people date_people char_3_people  \\\n",
       "people_id                                                                       \n",
       "ppl_100133        type 2  group 17304        type 2  2022-11-29        type 4   \n",
       "ppl_100212        type 2  group 31542        type 3  2023-06-20        type 5   \n",
       "\n",
       "             ...   char_29 char_30 char_31 char_32 char_33 char_34 char_35  \\\n",
       "people_id    ...                                                             \n",
       "ppl_100133   ...     False   False   False   False   False   False   False   \n",
       "ppl_100212   ...     False   False    True   False    True   False   False   \n",
       "\n",
       "           char_36 char_37 char_38  \n",
       "people_id                           \n",
       "ppl_100133   False   False       7  \n",
       "ppl_100212   False   False      87  \n",
       "\n",
       "[2 rows x 45 columns]"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train_datas['type 7'].head(2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "types=['type %d'%i for i in range(1,8)]\n",
    "for _type in types:\n",
    "    train_datas[_type].drop(['activity_category'],axis=1,inplace=True)\n",
    "    test_datas[_type].drop(['activity_category'],axis=1,inplace=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>activity_id</th>\n",
       "      <th>date_act</th>\n",
       "      <th>char_1_act</th>\n",
       "      <th>char_2_act</th>\n",
       "      <th>char_3_act</th>\n",
       "      <th>char_4_act</th>\n",
       "      <th>char_5_act</th>\n",
       "      <th>char_6_act</th>\n",
       "      <th>char_7_act</th>\n",
       "      <th>char_8_act</th>\n",
       "      <th>...</th>\n",
       "      <th>char_29</th>\n",
       "      <th>char_30</th>\n",
       "      <th>char_31</th>\n",
       "      <th>char_32</th>\n",
       "      <th>char_33</th>\n",
       "      <th>char_34</th>\n",
       "      <th>char_35</th>\n",
       "      <th>char_36</th>\n",
       "      <th>char_37</th>\n",
       "      <th>char_38</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>people_id</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>ppl_100025</th>\n",
       "      <td>act1_9923</td>\n",
       "      <td>2022-11-25</td>\n",
       "      <td>type 3</td>\n",
       "      <td>type 5</td>\n",
       "      <td>type 1</td>\n",
       "      <td>type 1</td>\n",
       "      <td>type 6</td>\n",
       "      <td>type 3</td>\n",
       "      <td>type 3</td>\n",
       "      <td>type 6</td>\n",
       "      <td>...</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>76</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>ppl_100033</th>\n",
       "      <td>act1_198174</td>\n",
       "      <td>2022-07-26</td>\n",
       "      <td>type 36</td>\n",
       "      <td>type 11</td>\n",
       "      <td>type 5</td>\n",
       "      <td>type 1</td>\n",
       "      <td>type 6</td>\n",
       "      <td>type 1</td>\n",
       "      <td>type 1</td>\n",
       "      <td>type 4</td>\n",
       "      <td>...</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>2 rows × 52 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "            activity_id   date_act char_1_act char_2_act char_3_act  \\\n",
       "people_id                                                             \n",
       "ppl_100025    act1_9923 2022-11-25     type 3     type 5     type 1   \n",
       "ppl_100033  act1_198174 2022-07-26    type 36    type 11     type 5   \n",
       "\n",
       "           char_4_act char_5_act char_6_act char_7_act char_8_act   ...    \\\n",
       "people_id                                                           ...     \n",
       "ppl_100025     type 1     type 6     type 3     type 3     type 6   ...     \n",
       "ppl_100033     type 1     type 6     type 1     type 1     type 4   ...     \n",
       "\n",
       "           char_29  char_30 char_31 char_32 char_33 char_34 char_35 char_36  \\\n",
       "people_id                                                                     \n",
       "ppl_100025   False    False   False   False   False   False   False   False   \n",
       "ppl_100033   False    False   False   False   False   False   False   False   \n",
       "\n",
       "           char_37 char_38  \n",
       "people_id                   \n",
       "ppl_100025   False      76  \n",
       "ppl_100033   False       0  \n",
       "\n",
       "[2 rows x 52 columns]"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train_datas['type 1'].head(2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>activity_id</th>\n",
       "      <th>date_act</th>\n",
       "      <th>char_10_act</th>\n",
       "      <th>outcome</th>\n",
       "      <th>char_1_people</th>\n",
       "      <th>group_1</th>\n",
       "      <th>char_2_people</th>\n",
       "      <th>date_people</th>\n",
       "      <th>char_3_people</th>\n",
       "      <th>char_4_people</th>\n",
       "      <th>...</th>\n",
       "      <th>char_29</th>\n",
       "      <th>char_30</th>\n",
       "      <th>char_31</th>\n",
       "      <th>char_32</th>\n",
       "      <th>char_33</th>\n",
       "      <th>char_34</th>\n",
       "      <th>char_35</th>\n",
       "      <th>char_36</th>\n",
       "      <th>char_37</th>\n",
       "      <th>char_38</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>people_id</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>ppl_100</th>\n",
       "      <td>act2_2434093</td>\n",
       "      <td>2022-09-27</td>\n",
       "      <td>type 1</td>\n",
       "      <td>0</td>\n",
       "      <td>type 2</td>\n",
       "      <td>group 17304</td>\n",
       "      <td>type 2</td>\n",
       "      <td>2021-06-29</td>\n",
       "      <td>type 5</td>\n",
       "      <td>type 5</td>\n",
       "      <td>...</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>36</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>ppl_100</th>\n",
       "      <td>act2_3404049</td>\n",
       "      <td>2022-09-27</td>\n",
       "      <td>type 1</td>\n",
       "      <td>0</td>\n",
       "      <td>type 2</td>\n",
       "      <td>group 17304</td>\n",
       "      <td>type 2</td>\n",
       "      <td>2021-06-29</td>\n",
       "      <td>type 5</td>\n",
       "      <td>type 5</td>\n",
       "      <td>...</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>36</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>2 rows × 44 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "            activity_id   date_act char_10_act  outcome char_1_people  \\\n",
       "people_id                                                               \n",
       "ppl_100    act2_2434093 2022-09-27      type 1        0        type 2   \n",
       "ppl_100    act2_3404049 2022-09-27      type 1        0        type 2   \n",
       "\n",
       "               group_1 char_2_people date_people char_3_people char_4_people  \\\n",
       "people_id                                                                      \n",
       "ppl_100    group 17304        type 2  2021-06-29        type 5        type 5   \n",
       "ppl_100    group 17304        type 2  2021-06-29        type 5        type 5   \n",
       "\n",
       "            ...   char_29 char_30 char_31 char_32 char_33 char_34 char_35  \\\n",
       "people_id   ...                                                             \n",
       "ppl_100     ...     False    True    True   False   False    True    True   \n",
       "ppl_100     ...     False    True    True   False   False    True    True   \n",
       "\n",
       "          char_36 char_37 char_38  \n",
       "people_id                          \n",
       "ppl_100      True   False      36  \n",
       "ppl_100      True   False      36  \n",
       "\n",
       "[2 rows x 44 columns]"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train_datas['type 2'].head(2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>activity_id</th>\n",
       "      <th>date_act</th>\n",
       "      <th>char_10_act</th>\n",
       "      <th>outcome</th>\n",
       "      <th>char_1_people</th>\n",
       "      <th>group_1</th>\n",
       "      <th>char_2_people</th>\n",
       "      <th>date_people</th>\n",
       "      <th>char_3_people</th>\n",
       "      <th>char_4_people</th>\n",
       "      <th>...</th>\n",
       "      <th>char_29</th>\n",
       "      <th>char_30</th>\n",
       "      <th>char_31</th>\n",
       "      <th>char_32</th>\n",
       "      <th>char_33</th>\n",
       "      <th>char_34</th>\n",
       "      <th>char_35</th>\n",
       "      <th>char_36</th>\n",
       "      <th>char_37</th>\n",
       "      <th>char_38</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>people_id</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>ppl_100006</th>\n",
       "      <td>act2_2380649</td>\n",
       "      <td>2022-07-27</td>\n",
       "      <td>type 114</td>\n",
       "      <td>1</td>\n",
       "      <td>type 2</td>\n",
       "      <td>group 6534</td>\n",
       "      <td>type 3</td>\n",
       "      <td>2022-07-27</td>\n",
       "      <td>type 40</td>\n",
       "      <td>type 25</td>\n",
       "      <td>...</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>84</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>ppl_100019</th>\n",
       "      <td>act2_1681678</td>\n",
       "      <td>2023-03-26</td>\n",
       "      <td>type 2</td>\n",
       "      <td>1</td>\n",
       "      <td>type 2</td>\n",
       "      <td>group 45749</td>\n",
       "      <td>type 3</td>\n",
       "      <td>2023-03-26</td>\n",
       "      <td>type 40</td>\n",
       "      <td>type 25</td>\n",
       "      <td>...</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>84</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>2 rows × 44 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "             activity_id   date_act char_10_act  outcome char_1_people  \\\n",
       "people_id                                                                \n",
       "ppl_100006  act2_2380649 2022-07-27    type 114        1        type 2   \n",
       "ppl_100019  act2_1681678 2023-03-26      type 2        1        type 2   \n",
       "\n",
       "                group_1 char_2_people date_people char_3_people char_4_people  \\\n",
       "people_id                                                                       \n",
       "ppl_100006   group 6534        type 3  2022-07-27       type 40       type 25   \n",
       "ppl_100019  group 45749        type 3  2023-03-26       type 40       type 25   \n",
       "\n",
       "             ...   char_29 char_30 char_31 char_32 char_33 char_34 char_35  \\\n",
       "people_id    ...                                                             \n",
       "ppl_100006   ...     False   False    True   False   False   False    True   \n",
       "ppl_100019   ...     False   False   False   False   False   False   False   \n",
       "\n",
       "           char_36 char_37 char_38  \n",
       "people_id                           \n",
       "ppl_100006    True   False      84  \n",
       "ppl_100019   False   False      84  \n",
       "\n",
       "[2 rows x 44 columns]"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train_datas['type 3'].head(2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "types=['type %d'%i for i in range(1,8)]\n",
    "for _type in types:\n",
    "    train_datas[_type].set_index(keys=['activity_id'], drop=True, append=True, inplace=True)\n",
    "    test_datas[_type].set_index(keys=['activity_id'], drop=True, append=True, inplace=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th>date_act</th>\n",
       "      <th>char_1_act</th>\n",
       "      <th>char_2_act</th>\n",
       "      <th>char_3_act</th>\n",
       "      <th>char_4_act</th>\n",
       "      <th>char_5_act</th>\n",
       "      <th>char_6_act</th>\n",
       "      <th>char_7_act</th>\n",
       "      <th>char_8_act</th>\n",
       "      <th>char_9_act</th>\n",
       "      <th>...</th>\n",
       "      <th>char_29</th>\n",
       "      <th>char_30</th>\n",
       "      <th>char_31</th>\n",
       "      <th>char_32</th>\n",
       "      <th>char_33</th>\n",
       "      <th>char_34</th>\n",
       "      <th>char_35</th>\n",
       "      <th>char_36</th>\n",
       "      <th>char_37</th>\n",
       "      <th>char_38</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>people_id</th>\n",
       "      <th>activity_id</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>ppl_100025</th>\n",
       "      <th>act1_9923</th>\n",
       "      <td>2022-11-25</td>\n",
       "      <td>type 3</td>\n",
       "      <td>type 5</td>\n",
       "      <td>type 1</td>\n",
       "      <td>type 1</td>\n",
       "      <td>type 6</td>\n",
       "      <td>type 3</td>\n",
       "      <td>type 3</td>\n",
       "      <td>type 6</td>\n",
       "      <td>type 8</td>\n",
       "      <td>...</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>76</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>ppl_100033</th>\n",
       "      <th>act1_198174</th>\n",
       "      <td>2022-07-26</td>\n",
       "      <td>type 36</td>\n",
       "      <td>type 11</td>\n",
       "      <td>type 5</td>\n",
       "      <td>type 1</td>\n",
       "      <td>type 6</td>\n",
       "      <td>type 1</td>\n",
       "      <td>type 1</td>\n",
       "      <td>type 4</td>\n",
       "      <td>type 1</td>\n",
       "      <td>...</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>2 rows × 51 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                         date_act char_1_act char_2_act char_3_act char_4_act  \\\n",
       "people_id  activity_id                                                          \n",
       "ppl_100025 act1_9923   2022-11-25     type 3     type 5     type 1     type 1   \n",
       "ppl_100033 act1_198174 2022-07-26    type 36    type 11     type 5     type 1   \n",
       "\n",
       "                       char_5_act char_6_act char_7_act char_8_act char_9_act  \\\n",
       "people_id  activity_id                                                          \n",
       "ppl_100025 act1_9923       type 6     type 3     type 3     type 6     type 8   \n",
       "ppl_100033 act1_198174     type 6     type 1     type 1     type 4     type 1   \n",
       "\n",
       "                         ...    char_29 char_30 char_31 char_32 char_33  \\\n",
       "people_id  activity_id   ...                                              \n",
       "ppl_100025 act1_9923     ...      False   False   False   False   False   \n",
       "ppl_100033 act1_198174   ...      False   False   False   False   False   \n",
       "\n",
       "                       char_34 char_35 char_36 char_37 char_38  \n",
       "people_id  activity_id                                          \n",
       "ppl_100025 act1_9923     False   False   False   False      76  \n",
       "ppl_100033 act1_198174   False   False   False   False       0  \n",
       "\n",
       "[2 rows x 51 columns]"
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train_datas['type 1'].head(2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th>date_act</th>\n",
       "      <th>char_10_act</th>\n",
       "      <th>outcome</th>\n",
       "      <th>char_1_people</th>\n",
       "      <th>group_1</th>\n",
       "      <th>char_2_people</th>\n",
       "      <th>date_people</th>\n",
       "      <th>char_3_people</th>\n",
       "      <th>char_4_people</th>\n",
       "      <th>char_5_people</th>\n",
       "      <th>...</th>\n",
       "      <th>char_29</th>\n",
       "      <th>char_30</th>\n",
       "      <th>char_31</th>\n",
       "      <th>char_32</th>\n",
       "      <th>char_33</th>\n",
       "      <th>char_34</th>\n",
       "      <th>char_35</th>\n",
       "      <th>char_36</th>\n",
       "      <th>char_37</th>\n",
       "      <th>char_38</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>people_id</th>\n",
       "      <th>activity_id</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">ppl_100</th>\n",
       "      <th>act2_2434093</th>\n",
       "      <td>2022-09-27</td>\n",
       "      <td>type 1</td>\n",
       "      <td>0</td>\n",
       "      <td>type 2</td>\n",
       "      <td>group 17304</td>\n",
       "      <td>type 2</td>\n",
       "      <td>2021-06-29</td>\n",
       "      <td>type 5</td>\n",
       "      <td>type 5</td>\n",
       "      <td>type 5</td>\n",
       "      <td>...</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>36</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>act2_3404049</th>\n",
       "      <td>2022-09-27</td>\n",
       "      <td>type 1</td>\n",
       "      <td>0</td>\n",
       "      <td>type 2</td>\n",
       "      <td>group 17304</td>\n",
       "      <td>type 2</td>\n",
       "      <td>2021-06-29</td>\n",
       "      <td>type 5</td>\n",
       "      <td>type 5</td>\n",
       "      <td>type 5</td>\n",
       "      <td>...</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>36</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>2 rows × 43 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                         date_act char_10_act  outcome char_1_people  \\\n",
       "people_id activity_id                                                  \n",
       "ppl_100   act2_2434093 2022-09-27      type 1        0        type 2   \n",
       "          act2_3404049 2022-09-27      type 1        0        type 2   \n",
       "\n",
       "                            group_1 char_2_people date_people char_3_people  \\\n",
       "people_id activity_id                                                         \n",
       "ppl_100   act2_2434093  group 17304        type 2  2021-06-29        type 5   \n",
       "          act2_3404049  group 17304        type 2  2021-06-29        type 5   \n",
       "\n",
       "                       char_4_people char_5_people   ...   char_29 char_30  \\\n",
       "people_id activity_id                                ...                     \n",
       "ppl_100   act2_2434093        type 5        type 5   ...     False    True   \n",
       "          act2_3404049        type 5        type 5   ...     False    True   \n",
       "\n",
       "                       char_31 char_32 char_33 char_34 char_35 char_36  \\\n",
       "people_id activity_id                                                    \n",
       "ppl_100   act2_2434093    True   False   False    True    True    True   \n",
       "          act2_3404049    True   False   False    True    True    True   \n",
       "\n",
       "                       char_37 char_38  \n",
       "people_id activity_id                   \n",
       "ppl_100   act2_2434093   False      36  \n",
       "          act2_3404049   False      36  \n",
       "\n",
       "[2 rows x 43 columns]"
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train_datas['type 2'].head(2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th>date_act</th>\n",
       "      <th>char_10_act</th>\n",
       "      <th>outcome</th>\n",
       "      <th>char_1_people</th>\n",
       "      <th>group_1</th>\n",
       "      <th>char_2_people</th>\n",
       "      <th>date_people</th>\n",
       "      <th>char_3_people</th>\n",
       "      <th>char_4_people</th>\n",
       "      <th>char_5_people</th>\n",
       "      <th>...</th>\n",
       "      <th>char_29</th>\n",
       "      <th>char_30</th>\n",
       "      <th>char_31</th>\n",
       "      <th>char_32</th>\n",
       "      <th>char_33</th>\n",
       "      <th>char_34</th>\n",
       "      <th>char_35</th>\n",
       "      <th>char_36</th>\n",
       "      <th>char_37</th>\n",
       "      <th>char_38</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>people_id</th>\n",
       "      <th>activity_id</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>ppl_100006</th>\n",
       "      <th>act2_2380649</th>\n",
       "      <td>2022-07-27</td>\n",
       "      <td>type 114</td>\n",
       "      <td>1</td>\n",
       "      <td>type 2</td>\n",
       "      <td>group 6534</td>\n",
       "      <td>type 3</td>\n",
       "      <td>2022-07-27</td>\n",
       "      <td>type 40</td>\n",
       "      <td>type 25</td>\n",
       "      <td>type 9</td>\n",
       "      <td>...</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>84</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>ppl_100019</th>\n",
       "      <th>act2_1681678</th>\n",
       "      <td>2023-03-26</td>\n",
       "      <td>type 2</td>\n",
       "      <td>1</td>\n",
       "      <td>type 2</td>\n",
       "      <td>group 45749</td>\n",
       "      <td>type 3</td>\n",
       "      <td>2023-03-26</td>\n",
       "      <td>type 40</td>\n",
       "      <td>type 25</td>\n",
       "      <td>type 9</td>\n",
       "      <td>...</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>84</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>2 rows × 43 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                          date_act char_10_act  outcome char_1_people  \\\n",
       "people_id  activity_id                                                  \n",
       "ppl_100006 act2_2380649 2022-07-27    type 114        1        type 2   \n",
       "ppl_100019 act2_1681678 2023-03-26      type 2        1        type 2   \n",
       "\n",
       "                             group_1 char_2_people date_people char_3_people  \\\n",
       "people_id  activity_id                                                         \n",
       "ppl_100006 act2_2380649   group 6534        type 3  2022-07-27       type 40   \n",
       "ppl_100019 act2_1681678  group 45749        type 3  2023-03-26       type 40   \n",
       "\n",
       "                        char_4_people char_5_people   ...   char_29 char_30  \\\n",
       "people_id  activity_id                                ...                     \n",
       "ppl_100006 act2_2380649       type 25        type 9   ...     False   False   \n",
       "ppl_100019 act2_1681678       type 25        type 9   ...     False   False   \n",
       "\n",
       "                        char_31 char_32 char_33 char_34 char_35 char_36  \\\n",
       "people_id  activity_id                                                    \n",
       "ppl_100006 act2_2380649    True   False   False   False    True    True   \n",
       "ppl_100019 act2_1681678   False   False   False   False   False   False   \n",
       "\n",
       "                        char_37 char_38  \n",
       "people_id  activity_id                   \n",
       "ppl_100006 act2_2380649   False      84  \n",
       "ppl_100019 act2_1681678   False      84  \n",
       "\n",
       "[2 rows x 43 columns]"
      ]
     },
     "execution_count": 26,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train_datas['type 3'].head(2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "True,True,True,True,True,True,True,True,True,True,True,True,True,True,"
     ]
    }
   ],
   "source": [
    "types=['type %d'%i for i in range(1,8)]\n",
    "for _type in types:\n",
    "    print(train_datas[_type].index.is_unique,end=',')\n",
    "    print(test_datas[_type].index.is_unique,end=',' )"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {
    "collapsed": false,
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>test_1</th>\n",
       "      <th>test_2</th>\n",
       "      <th>test_3</th>\n",
       "      <th>test_4</th>\n",
       "      <th>test_5</th>\n",
       "      <th>test_6</th>\n",
       "      <th>test_7</th>\n",
       "      <th>train_1</th>\n",
       "      <th>train_2</th>\n",
       "      <th>train_3</th>\n",
       "      <th>train_4</th>\n",
       "      <th>train_5</th>\n",
       "      <th>train_6</th>\n",
       "      <th>train_7</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>char_10_act</th>\n",
       "      <td>NaN</td>\n",
       "      <td>object</td>\n",
       "      <td>object</td>\n",
       "      <td>object</td>\n",
       "      <td>object</td>\n",
       "      <td>object</td>\n",
       "      <td>object</td>\n",
       "      <td>NaN</td>\n",
       "      <td>object</td>\n",
       "      <td>object</td>\n",
       "      <td>object</td>\n",
       "      <td>object</td>\n",
       "      <td>object</td>\n",
       "      <td>object</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>char_10_people</th>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>char_11</th>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>char_12</th>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>char_13</th>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>char_14</th>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>char_15</th>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>char_16</th>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>char_17</th>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>char_18</th>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>char_19</th>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>char_1_act</th>\n",
       "      <td>object</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>object</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>char_1_people</th>\n",
       "      <td>object</td>\n",
       "      <td>object</td>\n",
       "      <td>object</td>\n",
       "      <td>object</td>\n",
       "      <td>object</td>\n",
       "      <td>object</td>\n",
       "      <td>object</td>\n",
       "      <td>object</td>\n",
       "      <td>object</td>\n",
       "      <td>object</td>\n",
       "      <td>object</td>\n",
       "      <td>object</td>\n",
       "      <td>object</td>\n",
       "      <td>object</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>char_20</th>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>char_21</th>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>char_22</th>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>char_23</th>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>char_24</th>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>char_25</th>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>char_26</th>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>char_27</th>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>char_28</th>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>char_29</th>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>char_2_act</th>\n",
       "      <td>object</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>object</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>char_2_people</th>\n",
       "      <td>object</td>\n",
       "      <td>object</td>\n",
       "      <td>object</td>\n",
       "      <td>object</td>\n",
       "      <td>object</td>\n",
       "      <td>object</td>\n",
       "      <td>object</td>\n",
       "      <td>object</td>\n",
       "      <td>object</td>\n",
       "      <td>object</td>\n",
       "      <td>object</td>\n",
       "      <td>object</td>\n",
       "      <td>object</td>\n",
       "      <td>object</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>char_30</th>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>char_31</th>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>char_32</th>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>char_33</th>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>char_34</th>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>char_35</th>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>char_36</th>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>char_37</th>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "      <td>bool</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>char_38</th>\n",
       "      <td>int64</td>\n",
       "      <td>int64</td>\n",
       "      <td>int64</td>\n",
       "      <td>int64</td>\n",
       "      <td>int64</td>\n",
       "      <td>int64</td>\n",
       "      <td>int64</td>\n",
       "      <td>int64</td>\n",
       "      <td>int64</td>\n",
       "      <td>int64</td>\n",
       "      <td>int64</td>\n",
       "      <td>int64</td>\n",
       "      <td>int64</td>\n",
       "      <td>int64</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>char_3_act</th>\n",
       "      <td>object</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>object</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>char_3_people</th>\n",
       "      <td>object</td>\n",
       "      <td>object</td>\n",
       "      <td>object</td>\n",
       "      <td>object</td>\n",
       "      <td>object</td>\n",
       "      <td>object</td>\n",
       "      <td>object</td>\n",
       "      <td>object</td>\n",
       "      <td>object</td>\n",
       "      <td>object</td>\n",
       "      <td>object</td>\n",
       "      <td>object</td>\n",
       "      <td>object</td>\n",
       "      <td>object</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>char_4_act</th>\n",
       "      <td>object</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>object</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>char_4_people</th>\n",
       "      <td>object</td>\n",
       "      <td>object</td>\n",
       "      <td>object</td>\n",
       "      <td>object</td>\n",
       "      <td>object</td>\n",
       "      <td>object</td>\n",
       "      <td>object</td>\n",
       "      <td>object</td>\n",
       "      <td>object</td>\n",
       "      <td>object</td>\n",
       "      <td>object</td>\n",
       "      <td>object</td>\n",
       "      <td>object</td>\n",
       "      <td>object</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>char_5_act</th>\n",
       "      <td>object</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>object</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>char_5_people</th>\n",
       "      <td>object</td>\n",
       "      <td>object</td>\n",
       "      <td>object</td>\n",
       "      <td>object</td>\n",
       "      <td>object</td>\n",
       "      <td>object</td>\n",
       "      <td>object</td>\n",
       "      <td>object</td>\n",
       "      <td>object</td>\n",
       "      <td>object</td>\n",
       "      <td>object</td>\n",
       "      <td>object</td>\n",
       "      <td>object</td>\n",
       "      <td>object</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>char_6_act</th>\n",
       "      <td>object</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>object</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>char_6_people</th>\n",
       "      <td>object</td>\n",
       "      <td>object</td>\n",
       "      <td>object</td>\n",
       "      <td>object</td>\n",
       "      <td>object</td>\n",
       "      <td>object</td>\n",
       "      <td>object</td>\n",
       "      <td>object</td>\n",
       "      <td>object</td>\n",
       "      <td>object</td>\n",
       "      <td>object</td>\n",
       "      <td>object</td>\n",
       "      <td>object</td>\n",
       "      <td>object</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>char_7_act</th>\n",
       "      <td>object</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>object</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>char_7_people</th>\n",
       "      <td>object</td>\n",
       "      <td>object</td>\n",
       "      <td>object</td>\n",
       "      <td>object</td>\n",
       "      <td>object</td>\n",
       "      <td>object</td>\n",
       "      <td>object</td>\n",
       "      <td>object</td>\n",
       "      <td>object</td>\n",
       "      <td>object</td>\n",
       "      <td>object</td>\n",
       "      <td>object</td>\n",
       "      <td>object</td>\n",
       "      <td>object</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>char_8_act</th>\n",
       "      <td>object</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>object</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>char_8_people</th>\n",
       "      <td>object</td>\n",
       "      <td>object</td>\n",
       "      <td>object</td>\n",
       "      <td>object</td>\n",
       "      <td>object</td>\n",
       "      <td>object</td>\n",
       "      <td>object</td>\n",
       "      <td>object</td>\n",
       "      <td>object</td>\n",
       "      <td>object</td>\n",
       "      <td>object</td>\n",
       "      <td>object</td>\n",
       "      <td>object</td>\n",
       "      <td>object</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>char_9_act</th>\n",
       "      <td>object</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>object</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>char_9_people</th>\n",
       "      <td>object</td>\n",
       "      <td>object</td>\n",
       "      <td>object</td>\n",
       "      <td>object</td>\n",
       "      <td>object</td>\n",
       "      <td>object</td>\n",
       "      <td>object</td>\n",
       "      <td>object</td>\n",
       "      <td>object</td>\n",
       "      <td>object</td>\n",
       "      <td>object</td>\n",
       "      <td>object</td>\n",
       "      <td>object</td>\n",
       "      <td>object</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>date_act</th>\n",
       "      <td>datetime64[ns]</td>\n",
       "      <td>datetime64[ns]</td>\n",
       "      <td>datetime64[ns]</td>\n",
       "      <td>datetime64[ns]</td>\n",
       "      <td>datetime64[ns]</td>\n",
       "      <td>datetime64[ns]</td>\n",
       "      <td>datetime64[ns]</td>\n",
       "      <td>datetime64[ns]</td>\n",
       "      <td>datetime64[ns]</td>\n",
       "      <td>datetime64[ns]</td>\n",
       "      <td>datetime64[ns]</td>\n",
       "      <td>datetime64[ns]</td>\n",
       "      <td>datetime64[ns]</td>\n",
       "      <td>datetime64[ns]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>date_people</th>\n",
       "      <td>datetime64[ns]</td>\n",
       "      <td>datetime64[ns]</td>\n",
       "      <td>datetime64[ns]</td>\n",
       "      <td>datetime64[ns]</td>\n",
       "      <td>datetime64[ns]</td>\n",
       "      <td>datetime64[ns]</td>\n",
       "      <td>datetime64[ns]</td>\n",
       "      <td>datetime64[ns]</td>\n",
       "      <td>datetime64[ns]</td>\n",
       "      <td>datetime64[ns]</td>\n",
       "      <td>datetime64[ns]</td>\n",
       "      <td>datetime64[ns]</td>\n",
       "      <td>datetime64[ns]</td>\n",
       "      <td>datetime64[ns]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>group_1</th>\n",
       "      <td>object</td>\n",
       "      <td>object</td>\n",
       "      <td>object</td>\n",
       "      <td>object</td>\n",
       "      <td>object</td>\n",
       "      <td>object</td>\n",
       "      <td>object</td>\n",
       "      <td>object</td>\n",
       "      <td>object</td>\n",
       "      <td>object</td>\n",
       "      <td>object</td>\n",
       "      <td>object</td>\n",
       "      <td>object</td>\n",
       "      <td>object</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>outcome</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>int64</td>\n",
       "      <td>int64</td>\n",
       "      <td>int64</td>\n",
       "      <td>int64</td>\n",
       "      <td>int64</td>\n",
       "      <td>int64</td>\n",
       "      <td>int64</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                        test_1          test_2          test_3  \\\n",
       "char_10_act                NaN          object          object   \n",
       "char_10_people            bool            bool            bool   \n",
       "char_11                   bool            bool            bool   \n",
       "char_12                   bool            bool            bool   \n",
       "char_13                   bool            bool            bool   \n",
       "char_14                   bool            bool            bool   \n",
       "char_15                   bool            bool            bool   \n",
       "char_16                   bool            bool            bool   \n",
       "char_17                   bool            bool            bool   \n",
       "char_18                   bool            bool            bool   \n",
       "char_19                   bool            bool            bool   \n",
       "char_1_act              object             NaN             NaN   \n",
       "char_1_people           object          object          object   \n",
       "char_20                   bool            bool            bool   \n",
       "char_21                   bool            bool            bool   \n",
       "char_22                   bool            bool            bool   \n",
       "char_23                   bool            bool            bool   \n",
       "char_24                   bool            bool            bool   \n",
       "char_25                   bool            bool            bool   \n",
       "char_26                   bool            bool            bool   \n",
       "char_27                   bool            bool            bool   \n",
       "char_28                   bool            bool            bool   \n",
       "char_29                   bool            bool            bool   \n",
       "char_2_act              object             NaN             NaN   \n",
       "char_2_people           object          object          object   \n",
       "char_30                   bool            bool            bool   \n",
       "char_31                   bool            bool            bool   \n",
       "char_32                   bool            bool            bool   \n",
       "char_33                   bool            bool            bool   \n",
       "char_34                   bool            bool            bool   \n",
       "char_35                   bool            bool            bool   \n",
       "char_36                   bool            bool            bool   \n",
       "char_37                   bool            bool            bool   \n",
       "char_38                  int64           int64           int64   \n",
       "char_3_act              object             NaN             NaN   \n",
       "char_3_people           object          object          object   \n",
       "char_4_act              object             NaN             NaN   \n",
       "char_4_people           object          object          object   \n",
       "char_5_act              object             NaN             NaN   \n",
       "char_5_people           object          object          object   \n",
       "char_6_act              object             NaN             NaN   \n",
       "char_6_people           object          object          object   \n",
       "char_7_act              object             NaN             NaN   \n",
       "char_7_people           object          object          object   \n",
       "char_8_act              object             NaN             NaN   \n",
       "char_8_people           object          object          object   \n",
       "char_9_act              object             NaN             NaN   \n",
       "char_9_people           object          object          object   \n",
       "date_act        datetime64[ns]  datetime64[ns]  datetime64[ns]   \n",
       "date_people     datetime64[ns]  datetime64[ns]  datetime64[ns]   \n",
       "group_1                 object          object          object   \n",
       "outcome                    NaN             NaN             NaN   \n",
       "\n",
       "                        test_4          test_5          test_6  \\\n",
       "char_10_act             object          object          object   \n",
       "char_10_people            bool            bool            bool   \n",
       "char_11                   bool            bool            bool   \n",
       "char_12                   bool            bool            bool   \n",
       "char_13                   bool            bool            bool   \n",
       "char_14                   bool            bool            bool   \n",
       "char_15                   bool            bool            bool   \n",
       "char_16                   bool            bool            bool   \n",
       "char_17                   bool            bool            bool   \n",
       "char_18                   bool            bool            bool   \n",
       "char_19                   bool            bool            bool   \n",
       "char_1_act                 NaN             NaN             NaN   \n",
       "char_1_people           object          object          object   \n",
       "char_20                   bool            bool            bool   \n",
       "char_21                   bool            bool            bool   \n",
       "char_22                   bool            bool            bool   \n",
       "char_23                   bool            bool            bool   \n",
       "char_24                   bool            bool            bool   \n",
       "char_25                   bool            bool            bool   \n",
       "char_26                   bool            bool            bool   \n",
       "char_27                   bool            bool            bool   \n",
       "char_28                   bool            bool            bool   \n",
       "char_29                   bool            bool            bool   \n",
       "char_2_act                 NaN             NaN             NaN   \n",
       "char_2_people           object          object          object   \n",
       "char_30                   bool            bool            bool   \n",
       "char_31                   bool            bool            bool   \n",
       "char_32                   bool            bool            bool   \n",
       "char_33                   bool            bool            bool   \n",
       "char_34                   bool            bool            bool   \n",
       "char_35                   bool            bool            bool   \n",
       "char_36                   bool            bool            bool   \n",
       "char_37                   bool            bool            bool   \n",
       "char_38                  int64           int64           int64   \n",
       "char_3_act                 NaN             NaN             NaN   \n",
       "char_3_people           object          object          object   \n",
       "char_4_act                 NaN             NaN             NaN   \n",
       "char_4_people           object          object          object   \n",
       "char_5_act                 NaN             NaN             NaN   \n",
       "char_5_people           object          object          object   \n",
       "char_6_act                 NaN             NaN             NaN   \n",
       "char_6_people           object          object          object   \n",
       "char_7_act                 NaN             NaN             NaN   \n",
       "char_7_people           object          object          object   \n",
       "char_8_act                 NaN             NaN             NaN   \n",
       "char_8_people           object          object          object   \n",
       "char_9_act                 NaN             NaN             NaN   \n",
       "char_9_people           object          object          object   \n",
       "date_act        datetime64[ns]  datetime64[ns]  datetime64[ns]   \n",
       "date_people     datetime64[ns]  datetime64[ns]  datetime64[ns]   \n",
       "group_1                 object          object          object   \n",
       "outcome                    NaN             NaN             NaN   \n",
       "\n",
       "                        test_7         train_1         train_2  \\\n",
       "char_10_act             object             NaN          object   \n",
       "char_10_people            bool            bool            bool   \n",
       "char_11                   bool            bool            bool   \n",
       "char_12                   bool            bool            bool   \n",
       "char_13                   bool            bool            bool   \n",
       "char_14                   bool            bool            bool   \n",
       "char_15                   bool            bool            bool   \n",
       "char_16                   bool            bool            bool   \n",
       "char_17                   bool            bool            bool   \n",
       "char_18                   bool            bool            bool   \n",
       "char_19                   bool            bool            bool   \n",
       "char_1_act                 NaN          object             NaN   \n",
       "char_1_people           object          object          object   \n",
       "char_20                   bool            bool            bool   \n",
       "char_21                   bool            bool            bool   \n",
       "char_22                   bool            bool            bool   \n",
       "char_23                   bool            bool            bool   \n",
       "char_24                   bool            bool            bool   \n",
       "char_25                   bool            bool            bool   \n",
       "char_26                   bool            bool            bool   \n",
       "char_27                   bool            bool            bool   \n",
       "char_28                   bool            bool            bool   \n",
       "char_29                   bool            bool            bool   \n",
       "char_2_act                 NaN          object             NaN   \n",
       "char_2_people           object          object          object   \n",
       "char_30                   bool            bool            bool   \n",
       "char_31                   bool            bool            bool   \n",
       "char_32                   bool            bool            bool   \n",
       "char_33                   bool            bool            bool   \n",
       "char_34                   bool            bool            bool   \n",
       "char_35                   bool            bool            bool   \n",
       "char_36                   bool            bool            bool   \n",
       "char_37                   bool            bool            bool   \n",
       "char_38                  int64           int64           int64   \n",
       "char_3_act                 NaN          object             NaN   \n",
       "char_3_people           object          object          object   \n",
       "char_4_act                 NaN          object             NaN   \n",
       "char_4_people           object          object          object   \n",
       "char_5_act                 NaN          object             NaN   \n",
       "char_5_people           object          object          object   \n",
       "char_6_act                 NaN          object             NaN   \n",
       "char_6_people           object          object          object   \n",
       "char_7_act                 NaN          object             NaN   \n",
       "char_7_people           object          object          object   \n",
       "char_8_act                 NaN          object             NaN   \n",
       "char_8_people           object          object          object   \n",
       "char_9_act                 NaN          object             NaN   \n",
       "char_9_people           object          object          object   \n",
       "date_act        datetime64[ns]  datetime64[ns]  datetime64[ns]   \n",
       "date_people     datetime64[ns]  datetime64[ns]  datetime64[ns]   \n",
       "group_1                 object          object          object   \n",
       "outcome                    NaN           int64           int64   \n",
       "\n",
       "                       train_3         train_4         train_5  \\\n",
       "char_10_act             object          object          object   \n",
       "char_10_people            bool            bool            bool   \n",
       "char_11                   bool            bool            bool   \n",
       "char_12                   bool            bool            bool   \n",
       "char_13                   bool            bool            bool   \n",
       "char_14                   bool            bool            bool   \n",
       "char_15                   bool            bool            bool   \n",
       "char_16                   bool            bool            bool   \n",
       "char_17                   bool            bool            bool   \n",
       "char_18                   bool            bool            bool   \n",
       "char_19                   bool            bool            bool   \n",
       "char_1_act                 NaN             NaN             NaN   \n",
       "char_1_people           object          object          object   \n",
       "char_20                   bool            bool            bool   \n",
       "char_21                   bool            bool            bool   \n",
       "char_22                   bool            bool            bool   \n",
       "char_23                   bool            bool            bool   \n",
       "char_24                   bool            bool            bool   \n",
       "char_25                   bool            bool            bool   \n",
       "char_26                   bool            bool            bool   \n",
       "char_27                   bool            bool            bool   \n",
       "char_28                   bool            bool            bool   \n",
       "char_29                   bool            bool            bool   \n",
       "char_2_act                 NaN             NaN             NaN   \n",
       "char_2_people           object          object          object   \n",
       "char_30                   bool            bool            bool   \n",
       "char_31                   bool            bool            bool   \n",
       "char_32                   bool            bool            bool   \n",
       "char_33                   bool            bool            bool   \n",
       "char_34                   bool            bool            bool   \n",
       "char_35                   bool            bool            bool   \n",
       "char_36                   bool            bool            bool   \n",
       "char_37                   bool            bool            bool   \n",
       "char_38                  int64           int64           int64   \n",
       "char_3_act                 NaN             NaN             NaN   \n",
       "char_3_people           object          object          object   \n",
       "char_4_act                 NaN             NaN             NaN   \n",
       "char_4_people           object          object          object   \n",
       "char_5_act                 NaN             NaN             NaN   \n",
       "char_5_people           object          object          object   \n",
       "char_6_act                 NaN             NaN             NaN   \n",
       "char_6_people           object          object          object   \n",
       "char_7_act                 NaN             NaN             NaN   \n",
       "char_7_people           object          object          object   \n",
       "char_8_act                 NaN             NaN             NaN   \n",
       "char_8_people           object          object          object   \n",
       "char_9_act                 NaN             NaN             NaN   \n",
       "char_9_people           object          object          object   \n",
       "date_act        datetime64[ns]  datetime64[ns]  datetime64[ns]   \n",
       "date_people     datetime64[ns]  datetime64[ns]  datetime64[ns]   \n",
       "group_1                 object          object          object   \n",
       "outcome                  int64           int64           int64   \n",
       "\n",
       "                       train_6         train_7  \n",
       "char_10_act             object          object  \n",
       "char_10_people            bool            bool  \n",
       "char_11                   bool            bool  \n",
       "char_12                   bool            bool  \n",
       "char_13                   bool            bool  \n",
       "char_14                   bool            bool  \n",
       "char_15                   bool            bool  \n",
       "char_16                   bool            bool  \n",
       "char_17                   bool            bool  \n",
       "char_18                   bool            bool  \n",
       "char_19                   bool            bool  \n",
       "char_1_act                 NaN             NaN  \n",
       "char_1_people           object          object  \n",
       "char_20                   bool            bool  \n",
       "char_21                   bool            bool  \n",
       "char_22                   bool            bool  \n",
       "char_23                   bool            bool  \n",
       "char_24                   bool            bool  \n",
       "char_25                   bool            bool  \n",
       "char_26                   bool            bool  \n",
       "char_27                   bool            bool  \n",
       "char_28                   bool            bool  \n",
       "char_29                   bool            bool  \n",
       "char_2_act                 NaN             NaN  \n",
       "char_2_people           object          object  \n",
       "char_30                   bool            bool  \n",
       "char_31                   bool            bool  \n",
       "char_32                   bool            bool  \n",
       "char_33                   bool            bool  \n",
       "char_34                   bool            bool  \n",
       "char_35                   bool            bool  \n",
       "char_36                   bool            bool  \n",
       "char_37                   bool            bool  \n",
       "char_38                  int64           int64  \n",
       "char_3_act                 NaN             NaN  \n",
       "char_3_people           object          object  \n",
       "char_4_act                 NaN             NaN  \n",
       "char_4_people           object          object  \n",
       "char_5_act                 NaN             NaN  \n",
       "char_5_people           object          object  \n",
       "char_6_act                 NaN             NaN  \n",
       "char_6_people           object          object  \n",
       "char_7_act                 NaN             NaN  \n",
       "char_7_people           object          object  \n",
       "char_8_act                 NaN             NaN  \n",
       "char_8_people           object          object  \n",
       "char_9_act                 NaN             NaN  \n",
       "char_9_people           object          object  \n",
       "date_act        datetime64[ns]  datetime64[ns]  \n",
       "date_people     datetime64[ns]  datetime64[ns]  \n",
       "group_1                 object          object  \n",
       "outcome                  int64           int64  "
      ]
     },
     "execution_count": 28,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.DataFrame({'train_1':train_datas['type 1'].dtypes,'train_2':train_datas['type 2'].dtypes,\n",
    "              'train_3':train_datas['type 3'].dtypes,'train_4':train_datas['type 4'].dtypes,\n",
    "              'train_5':train_datas['type 5'].dtypes,'train_6':train_datas['type 6'].dtypes,\n",
    "              'train_7':train_datas['type 7'].dtypes,\n",
    "              'test_1':test_datas['type 1'].dtypes,'test_2':test_datas['type 2'].dtypes,\n",
    "              'test_3':test_datas['type 3'].dtypes,'test_4':test_datas['type 4'].dtypes,\n",
    "              'test_5':test_datas['type 5'].dtypes,'test_6':test_datas['type 6'].dtypes,\n",
    "              'test_7':test_datas['type 7'].dtypes,})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "str_col_list=['group_1']+['char_%d_act'%i for i in range(1,11)]+['char_%d_people'%i for i in range(1,10)]\n",
    "bool_col_list=['char_10_people']+['char_%d'%i for i in range(11,38)]\n",
    "types=['type %d'%i for i in range(1,8)]\n",
    "for _type in types:\n",
    "    for data_set in [train_datas,test_datas]:\n",
    "        data_set[_type].date_act= (data_set[_type].date_act- np.datetime64('1970-01-01'))/ np.timedelta64(1, 'D')\n",
    "        data_set[_type].date_people= (data_set[_type].date_people- np.datetime64('1970-01-01'))/ np.timedelta64(1,'D') \n",
    "        data_set[_type].group_1=data_set[_type].group_1.str.replace(\"group\",'').str.strip().astype(np.float64)\n",
    "        for col in bool_col_list:\n",
    "               if col in data_set[_type]:data_set[_type][col]=data_set[_type][col].astype(np.float64)\n",
    "        for col in str_col_list[1:]:\n",
    "               if col in data_set[_type]:data_set[_type][col]=data_set[_type][col].str.replace(\"type\",'').str.strip().astype(np.float64) \n",
    "\n",
    "        data_set[_type]= data_set[_type].astype(np.float64)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "True,True,True,True,True,True,True,True,True,True,True,True,True,True,"
     ]
    }
   ],
   "source": [
    "types=['type %d'%i for i in range(1,8)]\n",
    "for _type in types:\n",
    "    print((train_datas[_type].dtypes==np.float64).all(),end=',')\n",
    "    print((test_datas[_type].dtypes==np.float64).all(),end=',')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th>date_act</th>\n",
       "      <th>char_1_act</th>\n",
       "      <th>char_2_act</th>\n",
       "      <th>char_3_act</th>\n",
       "      <th>char_4_act</th>\n",
       "      <th>char_5_act</th>\n",
       "      <th>char_6_act</th>\n",
       "      <th>char_7_act</th>\n",
       "      <th>char_8_act</th>\n",
       "      <th>char_9_act</th>\n",
       "      <th>...</th>\n",
       "      <th>char_29</th>\n",
       "      <th>char_30</th>\n",
       "      <th>char_31</th>\n",
       "      <th>char_32</th>\n",
       "      <th>char_33</th>\n",
       "      <th>char_34</th>\n",
       "      <th>char_35</th>\n",
       "      <th>char_36</th>\n",
       "      <th>char_37</th>\n",
       "      <th>char_38</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>people_id</th>\n",
       "      <th>activity_id</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>ppl_100025</th>\n",
       "      <th>act1_9923</th>\n",
       "      <td>19321.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>8.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>76.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"4\" valign=\"top\">ppl_100033</th>\n",
       "      <th>act1_198174</th>\n",
       "      <td>19199.0</td>\n",
       "      <td>36.0</td>\n",
       "      <td>11.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>act1_214090</th>\n",
       "      <td>19523.0</td>\n",
       "      <td>24.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>act1_230588</th>\n",
       "      <td>19416.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>act1_271874</th>\n",
       "      <td>19199.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>8.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 51 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                        date_act  char_1_act  char_2_act  char_3_act  \\\n",
       "people_id  activity_id                                                 \n",
       "ppl_100025 act1_9923     19321.0         3.0         5.0         1.0   \n",
       "ppl_100033 act1_198174   19199.0        36.0        11.0         5.0   \n",
       "           act1_214090   19523.0        24.0         6.0         6.0   \n",
       "           act1_230588   19416.0         2.0         2.0         3.0   \n",
       "           act1_271874   19199.0         2.0         5.0         3.0   \n",
       "\n",
       "                        char_4_act  char_5_act  char_6_act  char_7_act  \\\n",
       "people_id  activity_id                                                   \n",
       "ppl_100025 act1_9923           1.0         6.0         3.0         3.0   \n",
       "ppl_100033 act1_198174         1.0         6.0         1.0         1.0   \n",
       "           act1_214090         3.0         1.0         3.0         4.0   \n",
       "           act1_230588         3.0         5.0         2.0         2.0   \n",
       "           act1_271874         2.0         6.0         1.0         1.0   \n",
       "\n",
       "                        char_8_act  char_9_act   ...     char_29  char_30  \\\n",
       "people_id  activity_id                           ...                        \n",
       "ppl_100025 act1_9923           6.0         8.0   ...         0.0      0.0   \n",
       "ppl_100033 act1_198174         4.0         1.0   ...         0.0      0.0   \n",
       "           act1_214090         5.0         1.0   ...         0.0      0.0   \n",
       "           act1_230588         4.0         2.0   ...         0.0      0.0   \n",
       "           act1_271874         6.0         8.0   ...         0.0      0.0   \n",
       "\n",
       "                        char_31  char_32  char_33  char_34  char_35  char_36  \\\n",
       "people_id  activity_id                                                         \n",
       "ppl_100025 act1_9923        0.0      0.0      0.0      0.0      0.0      0.0   \n",
       "ppl_100033 act1_198174      0.0      0.0      0.0      0.0      0.0      0.0   \n",
       "           act1_214090      0.0      0.0      0.0      0.0      0.0      0.0   \n",
       "           act1_230588      0.0      0.0      0.0      0.0      0.0      0.0   \n",
       "           act1_271874      0.0      0.0      0.0      0.0      0.0      0.0   \n",
       "\n",
       "                        char_37  char_38  \n",
       "people_id  activity_id                    \n",
       "ppl_100025 act1_9923        0.0     76.0  \n",
       "ppl_100033 act1_198174      0.0      0.0  \n",
       "           act1_214090      0.0      0.0  \n",
       "           act1_230588      0.0      0.0  \n",
       "           act1_271874      0.0      0.0  \n",
       "\n",
       "[5 rows x 51 columns]"
      ]
     },
     "execution_count": 32,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train_datas['type 1'].head(5)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>data_ta_1</th>\n",
       "      <th>len_ta_1</th>\n",
       "      <th>data_ta_2</th>\n",
       "      <th>len_ta_2</th>\n",
       "      <th>data_ta_3</th>\n",
       "      <th>len_ta_3</th>\n",
       "      <th>data_ta_4</th>\n",
       "      <th>len_ta_4</th>\n",
       "      <th>data_ta_5</th>\n",
       "      <th>len_ta_5</th>\n",
       "      <th>...</th>\n",
       "      <th>data_tt_2</th>\n",
       "      <th>len_tt_2</th>\n",
       "      <th>data_tt_3</th>\n",
       "      <th>len_tt_3</th>\n",
       "      <th>data_tt_4</th>\n",
       "      <th>len_tt_4</th>\n",
       "      <th>data_tt_5</th>\n",
       "      <th>len_tt_5</th>\n",
       "      <th>data_tt_6</th>\n",
       "      <th>len_tt_6</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>char_10_act</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>[ 1.]</td>\n",
       "      <td>1.0</td>\n",
       "      <td>[ 114.    2.   23.]...</td>\n",
       "      <td>450.0</td>\n",
       "      <td>[   76.  1727.   894.]...</td>\n",
       "      <td>3315.0</td>\n",
       "      <td>[ 5493.   489.   584.]...</td>\n",
       "      <td>2747.0</td>\n",
       "      <td>...</td>\n",
       "      <td>[ 1.]</td>\n",
       "      <td>1.0</td>\n",
       "      <td>[ 114.    2.   23.]...</td>\n",
       "      <td>282.0</td>\n",
       "      <td>[   76.  1727.   894.]...</td>\n",
       "      <td>2267.0</td>\n",
       "      <td>[ 5493.   489.   584.]...</td>\n",
       "      <td>1409.0</td>\n",
       "      <td>[ 110.]</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>char_10_people</th>\n",
       "      <td>[ 0.  1.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 1.  0.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 0.  1.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 1.  0.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 1.  0.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>...</td>\n",
       "      <td>[ 1.  0.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 0.  1.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 1.  0.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 1.  0.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 0.  1.]</td>\n",
       "      <td>2.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>char_11</th>\n",
       "      <td>[ 0.  1.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 0.  1.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 0.  1.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 0.  1.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 0.  1.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>...</td>\n",
       "      <td>[ 0.  1.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 0.  1.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 0.  1.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 0.  1.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 1.  0.]</td>\n",
       "      <td>2.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>char_12</th>\n",
       "      <td>[ 0.  1.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 0.  1.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 0.  1.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 0.  1.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 1.  0.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>...</td>\n",
       "      <td>[ 0.  1.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 0.  1.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 0.  1.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 1.  0.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 1.  0.]</td>\n",
       "      <td>2.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>char_13</th>\n",
       "      <td>[ 0.  1.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 1.  0.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 0.  1.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 1.  0.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 1.  0.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>...</td>\n",
       "      <td>[ 1.  0.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 0.  1.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 1.  0.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 1.  0.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 1.  0.]</td>\n",
       "      <td>2.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>char_14</th>\n",
       "      <td>[ 0.  1.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 1.  0.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 0.  1.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 1.  0.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 1.  0.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>...</td>\n",
       "      <td>[ 1.  0.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 0.  1.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 1.  0.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 1.  0.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 1.  0.]</td>\n",
       "      <td>2.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>char_15</th>\n",
       "      <td>[ 0.  1.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 0.  1.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 0.  1.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 0.  1.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 1.  0.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>...</td>\n",
       "      <td>[ 0.  1.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 0.  1.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 0.  1.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 1.  0.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 0.  1.]</td>\n",
       "      <td>2.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>char_16</th>\n",
       "      <td>[ 0.  1.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 1.  0.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 0.  1.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 1.  0.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 1.  0.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>...</td>\n",
       "      <td>[ 1.  0.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 0.  1.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 1.  0.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 1.  0.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 0.  1.]</td>\n",
       "      <td>2.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>char_17</th>\n",
       "      <td>[ 0.  1.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 0.  1.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 0.  1.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 0.  1.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 1.  0.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>...</td>\n",
       "      <td>[ 0.  1.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 0.  1.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 0.  1.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 1.  0.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 1.  0.]</td>\n",
       "      <td>2.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>char_18</th>\n",
       "      <td>[ 0.  1.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 0.  1.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 0.  1.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 0.  1.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 1.  0.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>...</td>\n",
       "      <td>[ 0.  1.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 0.  1.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 0.  1.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 1.  0.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 0.  1.]</td>\n",
       "      <td>2.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>char_19</th>\n",
       "      <td>[ 0.  1.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 0.  1.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 0.  1.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 0.  1.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 1.  0.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>...</td>\n",
       "      <td>[ 0.  1.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 0.  1.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 0.  1.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 1.  0.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 1.  0.]</td>\n",
       "      <td>2.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>char_1_act</th>\n",
       "      <td>[  3.  36.  24.]...</td>\n",
       "      <td>51.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>char_1_people</th>\n",
       "      <td>[ 2.  1.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 2.  1.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 2.  1.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 2.  1.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 2.  1.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>...</td>\n",
       "      <td>[ 2.  1.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 2.  1.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 2.  1.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 2.  1.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 2.  1.]</td>\n",
       "      <td>2.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>char_20</th>\n",
       "      <td>[ 0.  1.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 0.  1.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 0.  1.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 0.  1.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 1.  0.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>...</td>\n",
       "      <td>[ 0.  1.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 0.  1.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 0.  1.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 1.  0.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 0.  1.]</td>\n",
       "      <td>2.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>char_21</th>\n",
       "      <td>[ 0.  1.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 1.  0.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 0.  1.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 1.  0.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 1.  0.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>...</td>\n",
       "      <td>[ 1.  0.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 0.  1.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 1.  0.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 1.  0.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 0.  1.]</td>\n",
       "      <td>2.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>char_22</th>\n",
       "      <td>[ 0.  1.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 0.  1.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 0.  1.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 0.  1.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 1.  0.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>...</td>\n",
       "      <td>[ 0.  1.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 0.  1.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 0.  1.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 1.  0.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 1.  0.]</td>\n",
       "      <td>2.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>char_23</th>\n",
       "      <td>[ 0.  1.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 0.  1.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 0.  1.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 0.  1.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 1.  0.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>...</td>\n",
       "      <td>[ 0.  1.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 0.  1.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 0.  1.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 1.  0.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 1.  0.]</td>\n",
       "      <td>2.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>char_24</th>\n",
       "      <td>[ 0.  1.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 0.  1.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 0.  1.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 0.  1.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 1.  0.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>...</td>\n",
       "      <td>[ 0.  1.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 0.  1.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 0.  1.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 1.  0.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 0.  1.]</td>\n",
       "      <td>2.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>char_25</th>\n",
       "      <td>[ 0.  1.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 0.  1.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 0.  1.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 0.  1.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 0.  1.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>...</td>\n",
       "      <td>[ 0.  1.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 0.  1.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 0.  1.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 0.  1.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 1.  0.]</td>\n",
       "      <td>2.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>char_26</th>\n",
       "      <td>[ 0.  1.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 0.  1.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 0.  1.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 0.  1.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 0.  1.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>...</td>\n",
       "      <td>[ 0.  1.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 0.  1.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 0.  1.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 0.  1.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 0.  1.]</td>\n",
       "      <td>2.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>char_27</th>\n",
       "      <td>[ 0.  1.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 1.  0.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 0.  1.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 1.  0.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 1.  0.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>...</td>\n",
       "      <td>[ 1.  0.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 0.  1.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 1.  0.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 1.  0.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 0.  1.]</td>\n",
       "      <td>2.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>char_28</th>\n",
       "      <td>[ 0.  1.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 1.  0.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 0.  1.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 1.  0.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 1.  0.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>...</td>\n",
       "      <td>[ 1.  0.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 0.  1.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 1.  0.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 1.  0.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 0.  1.]</td>\n",
       "      <td>2.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>char_29</th>\n",
       "      <td>[ 0.  1.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 0.  1.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 0.  1.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 0.  1.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 0.  1.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>...</td>\n",
       "      <td>[ 0.  1.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 0.  1.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 0.  1.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 0.  1.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 1.  0.]</td>\n",
       "      <td>2.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>char_2_act</th>\n",
       "      <td>[  5.  11.   6.]...</td>\n",
       "      <td>32.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>char_2_people</th>\n",
       "      <td>[ 3.  2.  1.]</td>\n",
       "      <td>3.0</td>\n",
       "      <td>[ 2.  3.  1.]</td>\n",
       "      <td>3.0</td>\n",
       "      <td>[ 3.  2.  1.]</td>\n",
       "      <td>3.0</td>\n",
       "      <td>[ 2.  3.  1.]</td>\n",
       "      <td>3.0</td>\n",
       "      <td>[ 3.  2.  1.]</td>\n",
       "      <td>3.0</td>\n",
       "      <td>...</td>\n",
       "      <td>[ 2.  3.  1.]</td>\n",
       "      <td>3.0</td>\n",
       "      <td>[ 3.  2.  1.]</td>\n",
       "      <td>3.0</td>\n",
       "      <td>[ 2.  3.  1.]</td>\n",
       "      <td>3.0</td>\n",
       "      <td>[ 3.  2.  1.]</td>\n",
       "      <td>3.0</td>\n",
       "      <td>[ 3.  2.  1.]</td>\n",
       "      <td>3.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>char_30</th>\n",
       "      <td>[ 0.  1.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 1.  0.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 0.  1.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 1.  0.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 0.  1.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>...</td>\n",
       "      <td>[ 1.  0.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 0.  1.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 1.  0.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 0.  1.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 0.  1.]</td>\n",
       "      <td>2.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>char_31</th>\n",
       "      <td>[ 0.  1.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 1.  0.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 1.  0.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 1.  0.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 1.  0.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>...</td>\n",
       "      <td>[ 1.  0.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 1.  0.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 1.  0.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 1.  0.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 1.  0.]</td>\n",
       "      <td>2.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>char_32</th>\n",
       "      <td>[ 0.  1.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 0.  1.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 0.  1.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 0.  1.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 1.  0.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>...</td>\n",
       "      <td>[ 0.  1.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 0.  1.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 0.  1.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 1.  0.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 1.  0.]</td>\n",
       "      <td>2.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>char_33</th>\n",
       "      <td>[ 0.  1.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 0.  1.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 0.  1.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 0.  1.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 1.  0.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>...</td>\n",
       "      <td>[ 0.  1.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 0.  1.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 0.  1.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 1.  0.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 0.  1.]</td>\n",
       "      <td>2.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>char_34</th>\n",
       "      <td>[ 0.  1.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 1.  0.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 0.  1.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 1.  0.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 1.  0.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>...</td>\n",
       "      <td>[ 1.  0.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 0.  1.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 1.  0.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 1.  0.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 1.  0.]</td>\n",
       "      <td>2.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>char_35</th>\n",
       "      <td>[ 0.  1.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 1.  0.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 1.  0.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 1.  0.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 0.  1.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>...</td>\n",
       "      <td>[ 1.  0.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 1.  0.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 1.  0.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 0.  1.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 0.  1.]</td>\n",
       "      <td>2.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>char_36</th>\n",
       "      <td>[ 0.  1.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 1.  0.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 1.  0.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 1.  0.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 1.  0.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>...</td>\n",
       "      <td>[ 1.  0.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 1.  0.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 1.  0.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 1.  0.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 0.  1.]</td>\n",
       "      <td>2.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>char_37</th>\n",
       "      <td>[ 0.  1.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 0.  1.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 0.  1.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 0.  1.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 1.  0.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>...</td>\n",
       "      <td>[ 0.  1.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 0.  1.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 0.  1.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 1.  0.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 0.  1.]</td>\n",
       "      <td>2.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>char_38</th>\n",
       "      <td>[  76.    0.  100.]...</td>\n",
       "      <td>101.0</td>\n",
       "      <td>[ 36.  76.  99.]...</td>\n",
       "      <td>101.0</td>\n",
       "      <td>[ 84.  76.   0.]...</td>\n",
       "      <td>101.0</td>\n",
       "      <td>[ 36.  99.  84.]...</td>\n",
       "      <td>101.0</td>\n",
       "      <td>[ 91.  76.  68.]...</td>\n",
       "      <td>101.0</td>\n",
       "      <td>...</td>\n",
       "      <td>[ 36.  76.  99.]...</td>\n",
       "      <td>101.0</td>\n",
       "      <td>[ 84.  76.   0.]...</td>\n",
       "      <td>101.0</td>\n",
       "      <td>[ 36.  99.  84.]...</td>\n",
       "      <td>101.0</td>\n",
       "      <td>[ 91.  76.  68.]...</td>\n",
       "      <td>101.0</td>\n",
       "      <td>[ 93.  89.   0.]...</td>\n",
       "      <td>95.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>char_3_act</th>\n",
       "      <td>[ 1.  5.  6.]...</td>\n",
       "      <td>11.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>char_3_people</th>\n",
       "      <td>[ 14.  10.   4.]...</td>\n",
       "      <td>42.0</td>\n",
       "      <td>[  5.  28.   4.]...</td>\n",
       "      <td>43.0</td>\n",
       "      <td>[ 40.  14.   4.]...</td>\n",
       "      <td>42.0</td>\n",
       "      <td>[  5.   4.  40.]...</td>\n",
       "      <td>42.0</td>\n",
       "      <td>[  4.  14.   5.]...</td>\n",
       "      <td>41.0</td>\n",
       "      <td>...</td>\n",
       "      <td>[  5.  28.   4.]...</td>\n",
       "      <td>41.0</td>\n",
       "      <td>[ 40.  14.   4.]...</td>\n",
       "      <td>40.0</td>\n",
       "      <td>[  5.   4.  40.]...</td>\n",
       "      <td>40.0</td>\n",
       "      <td>[  4.  14.   5.]...</td>\n",
       "      <td>39.0</td>\n",
       "      <td>[  7.  15.   9.]...</td>\n",
       "      <td>29.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>char_4_act</th>\n",
       "      <td>[ 1.  3.  2.]...</td>\n",
       "      <td>7.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>char_4_people</th>\n",
       "      <td>[  6.   7.  10.]...</td>\n",
       "      <td>25.0</td>\n",
       "      <td>[ 5.  9.  8.]...</td>\n",
       "      <td>25.0</td>\n",
       "      <td>[ 25.   6.   7.]...</td>\n",
       "      <td>25.0</td>\n",
       "      <td>[  5.   8.  25.]...</td>\n",
       "      <td>25.0</td>\n",
       "      <td>[ 8.  6.  5.]...</td>\n",
       "      <td>25.0</td>\n",
       "      <td>...</td>\n",
       "      <td>[ 5.  9.  8.]...</td>\n",
       "      <td>25.0</td>\n",
       "      <td>[ 25.   6.   7.]...</td>\n",
       "      <td>25.0</td>\n",
       "      <td>[  5.   8.  25.]...</td>\n",
       "      <td>25.0</td>\n",
       "      <td>[ 8.  6.  5.]...</td>\n",
       "      <td>25.0</td>\n",
       "      <td>[ 2.  3.  6.]...</td>\n",
       "      <td>23.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>char_5_act</th>\n",
       "      <td>[ 6.  1.  5.]...</td>\n",
       "      <td>7.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>char_5_people</th>\n",
       "      <td>[ 8.  6.  4.]...</td>\n",
       "      <td>9.0</td>\n",
       "      <td>[ 5.  8.  9.]...</td>\n",
       "      <td>9.0</td>\n",
       "      <td>[ 9.  8.  4.]...</td>\n",
       "      <td>9.0</td>\n",
       "      <td>[ 5.  9.  8.]...</td>\n",
       "      <td>9.0</td>\n",
       "      <td>[ 4.  8.  5.]...</td>\n",
       "      <td>9.0</td>\n",
       "      <td>...</td>\n",
       "      <td>[ 5.  8.  9.]...</td>\n",
       "      <td>9.0</td>\n",
       "      <td>[ 9.  8.  4.]...</td>\n",
       "      <td>9.0</td>\n",
       "      <td>[ 5.  9.  8.]...</td>\n",
       "      <td>9.0</td>\n",
       "      <td>[ 4.  8.  5.]...</td>\n",
       "      <td>9.0</td>\n",
       "      <td>[ 7.  6.  5.]...</td>\n",
       "      <td>9.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>char_6_act</th>\n",
       "      <td>[ 3.  1.  2.]...</td>\n",
       "      <td>5.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>char_6_people</th>\n",
       "      <td>[ 3.  1.  2.]...</td>\n",
       "      <td>7.0</td>\n",
       "      <td>[ 3.  2.  4.]...</td>\n",
       "      <td>7.0</td>\n",
       "      <td>[ 3.  4.  1.]...</td>\n",
       "      <td>7.0</td>\n",
       "      <td>[ 3.  2.  1.]...</td>\n",
       "      <td>7.0</td>\n",
       "      <td>[ 1.  3.  2.]...</td>\n",
       "      <td>7.0</td>\n",
       "      <td>...</td>\n",
       "      <td>[ 3.  2.  4.]...</td>\n",
       "      <td>7.0</td>\n",
       "      <td>[ 3.  4.  1.]...</td>\n",
       "      <td>7.0</td>\n",
       "      <td>[ 3.  2.  1.]...</td>\n",
       "      <td>7.0</td>\n",
       "      <td>[ 1.  3.  2.]...</td>\n",
       "      <td>7.0</td>\n",
       "      <td>[ 2.  1.  4.]...</td>\n",
       "      <td>7.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>char_7_act</th>\n",
       "      <td>[ 3.  1.  4.]...</td>\n",
       "      <td>8.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>char_7_people</th>\n",
       "      <td>[  9.  23.   6.]...</td>\n",
       "      <td>25.0</td>\n",
       "      <td>[ 11.   5.   9.]...</td>\n",
       "      <td>25.0</td>\n",
       "      <td>[  8.   9.  20.]...</td>\n",
       "      <td>25.0</td>\n",
       "      <td>[ 11.   5.   8.]...</td>\n",
       "      <td>25.0</td>\n",
       "      <td>[  7.   9.  23.]...</td>\n",
       "      <td>25.0</td>\n",
       "      <td>...</td>\n",
       "      <td>[ 11.   5.   9.]...</td>\n",
       "      <td>25.0</td>\n",
       "      <td>[  8.   9.  20.]...</td>\n",
       "      <td>25.0</td>\n",
       "      <td>[ 11.   5.   8.]...</td>\n",
       "      <td>25.0</td>\n",
       "      <td>[  7.   9.  23.]...</td>\n",
       "      <td>25.0</td>\n",
       "      <td>[  6.   4.  19.]...</td>\n",
       "      <td>25.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>char_8_act</th>\n",
       "      <td>[ 6.  4.  5.]...</td>\n",
       "      <td>18.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>char_8_people</th>\n",
       "      <td>[ 6.  3.  2.]...</td>\n",
       "      <td>8.0</td>\n",
       "      <td>[ 2.  6.  3.]...</td>\n",
       "      <td>8.0</td>\n",
       "      <td>[ 2.  3.  6.]...</td>\n",
       "      <td>8.0</td>\n",
       "      <td>[ 2.  6.  3.]...</td>\n",
       "      <td>8.0</td>\n",
       "      <td>[ 2.  6.  3.]...</td>\n",
       "      <td>8.0</td>\n",
       "      <td>...</td>\n",
       "      <td>[ 2.  6.  3.]...</td>\n",
       "      <td>8.0</td>\n",
       "      <td>[ 2.  3.  6.]...</td>\n",
       "      <td>8.0</td>\n",
       "      <td>[ 2.  6.  3.]...</td>\n",
       "      <td>8.0</td>\n",
       "      <td>[ 2.  6.  3.]...</td>\n",
       "      <td>8.0</td>\n",
       "      <td>[ 4.  2.  3.]...</td>\n",
       "      <td>8.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>char_9_act</th>\n",
       "      <td>[ 8.  1.  2.]...</td>\n",
       "      <td>19.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>char_9_people</th>\n",
       "      <td>[ 6.  3.  2.]...</td>\n",
       "      <td>9.0</td>\n",
       "      <td>[ 2.  4.  6.]...</td>\n",
       "      <td>9.0</td>\n",
       "      <td>[ 2.  3.  6.]...</td>\n",
       "      <td>9.0</td>\n",
       "      <td>[ 2.  6.  3.]...</td>\n",
       "      <td>9.0</td>\n",
       "      <td>[ 3.  6.  5.]...</td>\n",
       "      <td>9.0</td>\n",
       "      <td>...</td>\n",
       "      <td>[ 2.  4.  6.]...</td>\n",
       "      <td>9.0</td>\n",
       "      <td>[ 2.  3.  6.]...</td>\n",
       "      <td>9.0</td>\n",
       "      <td>[ 2.  6.  3.]...</td>\n",
       "      <td>9.0</td>\n",
       "      <td>[ 3.  6.  5.]...</td>\n",
       "      <td>9.0</td>\n",
       "      <td>[ 4.  9.  3.]...</td>\n",
       "      <td>9.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>date_act</th>\n",
       "      <td>[ 19321.  19199.  19523.]...</td>\n",
       "      <td>411.0</td>\n",
       "      <td>[ 19262.  19573.  19595.]...</td>\n",
       "      <td>386.0</td>\n",
       "      <td>[ 19200.  19442.  19230.]...</td>\n",
       "      <td>386.0</td>\n",
       "      <td>[ 19595.  19573.  19214.]...</td>\n",
       "      <td>386.0</td>\n",
       "      <td>[ 19383.  19382.  19251.]...</td>\n",
       "      <td>258.0</td>\n",
       "      <td>...</td>\n",
       "      <td>[ 19262.  19573.  19595.]...</td>\n",
       "      <td>387.0</td>\n",
       "      <td>[ 19200.  19442.  19230.]...</td>\n",
       "      <td>387.0</td>\n",
       "      <td>[ 19595.  19573.  19214.]...</td>\n",
       "      <td>386.0</td>\n",
       "      <td>[ 19383.  19382.  19251.]...</td>\n",
       "      <td>250.0</td>\n",
       "      <td>[ 19416.  19413.  19314.]...</td>\n",
       "      <td>278.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>date_people</th>\n",
       "      <td>[ 19230.  19199.  19014.]...</td>\n",
       "      <td>1189.0</td>\n",
       "      <td>[ 18807.  18633.  19153.]...</td>\n",
       "      <td>1195.0</td>\n",
       "      <td>[ 19200.  19442.  19230.]...</td>\n",
       "      <td>1191.0</td>\n",
       "      <td>[ 18807.  19153.  19200.]...</td>\n",
       "      <td>1189.0</td>\n",
       "      <td>[ 19381.  19230.  19215.]...</td>\n",
       "      <td>986.0</td>\n",
       "      <td>...</td>\n",
       "      <td>[ 18807.  18633.  19153.]...</td>\n",
       "      <td>1165.0</td>\n",
       "      <td>[ 19200.  19442.  19230.]...</td>\n",
       "      <td>1106.0</td>\n",
       "      <td>[ 18807.  19153.  19200.]...</td>\n",
       "      <td>1116.0</td>\n",
       "      <td>[ 19381.  19230.  19215.]...</td>\n",
       "      <td>763.0</td>\n",
       "      <td>[ 19415.  19287.  19313.]...</td>\n",
       "      <td>441.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>group_1</th>\n",
       "      <td>[ 36096.  17304.   9439.]...</td>\n",
       "      <td>17008.0</td>\n",
       "      <td>[ 17304.   8688.  33592.]...</td>\n",
       "      <td>23030.0</td>\n",
       "      <td>[  6534.  45749.  36096.]...</td>\n",
       "      <td>18955.0</td>\n",
       "      <td>[ 17304.  33592.   6534.]...</td>\n",
       "      <td>15958.0</td>\n",
       "      <td>[  4204.  36096.  19662.]...</td>\n",
       "      <td>9231.0</td>\n",
       "      <td>...</td>\n",
       "      <td>[ 17304.   8688.  33592.]...</td>\n",
       "      <td>8532.0</td>\n",
       "      <td>[  6534.  45749.  36096.]...</td>\n",
       "      <td>7216.0</td>\n",
       "      <td>[ 17304.  33592.   6534.]...</td>\n",
       "      <td>5592.0</td>\n",
       "      <td>[  4204.  36096.  19662.]...</td>\n",
       "      <td>3370.0</td>\n",
       "      <td>[ 18966.  38766.  17304.]...</td>\n",
       "      <td>714.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>outcome</th>\n",
       "      <td>[ 0.  1.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 0.  1.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 1.  0.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 0.  1.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>[ 1.  0.]</td>\n",
       "      <td>2.0</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>52 rows × 28 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                                   data_ta_1  len_ta_1  \\\n",
       "char_10_act                              NaN       NaN   \n",
       "char_10_people                     [ 0.  1.]       2.0   \n",
       "char_11                            [ 0.  1.]       2.0   \n",
       "char_12                            [ 0.  1.]       2.0   \n",
       "char_13                            [ 0.  1.]       2.0   \n",
       "char_14                            [ 0.  1.]       2.0   \n",
       "char_15                            [ 0.  1.]       2.0   \n",
       "char_16                            [ 0.  1.]       2.0   \n",
       "char_17                            [ 0.  1.]       2.0   \n",
       "char_18                            [ 0.  1.]       2.0   \n",
       "char_19                            [ 0.  1.]       2.0   \n",
       "char_1_act               [  3.  36.  24.]...      51.0   \n",
       "char_1_people                      [ 2.  1.]       2.0   \n",
       "char_20                            [ 0.  1.]       2.0   \n",
       "char_21                            [ 0.  1.]       2.0   \n",
       "char_22                            [ 0.  1.]       2.0   \n",
       "char_23                            [ 0.  1.]       2.0   \n",
       "char_24                            [ 0.  1.]       2.0   \n",
       "char_25                            [ 0.  1.]       2.0   \n",
       "char_26                            [ 0.  1.]       2.0   \n",
       "char_27                            [ 0.  1.]       2.0   \n",
       "char_28                            [ 0.  1.]       2.0   \n",
       "char_29                            [ 0.  1.]       2.0   \n",
       "char_2_act               [  5.  11.   6.]...      32.0   \n",
       "char_2_people                  [ 3.  2.  1.]       3.0   \n",
       "char_30                            [ 0.  1.]       2.0   \n",
       "char_31                            [ 0.  1.]       2.0   \n",
       "char_32                            [ 0.  1.]       2.0   \n",
       "char_33                            [ 0.  1.]       2.0   \n",
       "char_34                            [ 0.  1.]       2.0   \n",
       "char_35                            [ 0.  1.]       2.0   \n",
       "char_36                            [ 0.  1.]       2.0   \n",
       "char_37                            [ 0.  1.]       2.0   \n",
       "char_38               [  76.    0.  100.]...     101.0   \n",
       "char_3_act                  [ 1.  5.  6.]...      11.0   \n",
       "char_3_people            [ 14.  10.   4.]...      42.0   \n",
       "char_4_act                  [ 1.  3.  2.]...       7.0   \n",
       "char_4_people            [  6.   7.  10.]...      25.0   \n",
       "char_5_act                  [ 6.  1.  5.]...       7.0   \n",
       "char_5_people               [ 8.  6.  4.]...       9.0   \n",
       "char_6_act                  [ 3.  1.  2.]...       5.0   \n",
       "char_6_people               [ 3.  1.  2.]...       7.0   \n",
       "char_7_act                  [ 3.  1.  4.]...       8.0   \n",
       "char_7_people            [  9.  23.   6.]...      25.0   \n",
       "char_8_act                  [ 6.  4.  5.]...      18.0   \n",
       "char_8_people               [ 6.  3.  2.]...       8.0   \n",
       "char_9_act                  [ 8.  1.  2.]...      19.0   \n",
       "char_9_people               [ 6.  3.  2.]...       9.0   \n",
       "date_act        [ 19321.  19199.  19523.]...     411.0   \n",
       "date_people     [ 19230.  19199.  19014.]...    1189.0   \n",
       "group_1         [ 36096.  17304.   9439.]...   17008.0   \n",
       "outcome                            [ 0.  1.]       2.0   \n",
       "\n",
       "                                   data_ta_2  len_ta_2  \\\n",
       "char_10_act                            [ 1.]       1.0   \n",
       "char_10_people                     [ 1.  0.]       2.0   \n",
       "char_11                            [ 0.  1.]       2.0   \n",
       "char_12                            [ 0.  1.]       2.0   \n",
       "char_13                            [ 1.  0.]       2.0   \n",
       "char_14                            [ 1.  0.]       2.0   \n",
       "char_15                            [ 0.  1.]       2.0   \n",
       "char_16                            [ 1.  0.]       2.0   \n",
       "char_17                            [ 0.  1.]       2.0   \n",
       "char_18                            [ 0.  1.]       2.0   \n",
       "char_19                            [ 0.  1.]       2.0   \n",
       "char_1_act                               NaN       NaN   \n",
       "char_1_people                      [ 2.  1.]       2.0   \n",
       "char_20                            [ 0.  1.]       2.0   \n",
       "char_21                            [ 1.  0.]       2.0   \n",
       "char_22                            [ 0.  1.]       2.0   \n",
       "char_23                            [ 0.  1.]       2.0   \n",
       "char_24                            [ 0.  1.]       2.0   \n",
       "char_25                            [ 0.  1.]       2.0   \n",
       "char_26                            [ 0.  1.]       2.0   \n",
       "char_27                            [ 1.  0.]       2.0   \n",
       "char_28                            [ 1.  0.]       2.0   \n",
       "char_29                            [ 0.  1.]       2.0   \n",
       "char_2_act                               NaN       NaN   \n",
       "char_2_people                  [ 2.  3.  1.]       3.0   \n",
       "char_30                            [ 1.  0.]       2.0   \n",
       "char_31                            [ 1.  0.]       2.0   \n",
       "char_32                            [ 0.  1.]       2.0   \n",
       "char_33                            [ 0.  1.]       2.0   \n",
       "char_34                            [ 1.  0.]       2.0   \n",
       "char_35                            [ 1.  0.]       2.0   \n",
       "char_36                            [ 1.  0.]       2.0   \n",
       "char_37                            [ 0.  1.]       2.0   \n",
       "char_38                  [ 36.  76.  99.]...     101.0   \n",
       "char_3_act                               NaN       NaN   \n",
       "char_3_people            [  5.  28.   4.]...      43.0   \n",
       "char_4_act                               NaN       NaN   \n",
       "char_4_people               [ 5.  9.  8.]...      25.0   \n",
       "char_5_act                               NaN       NaN   \n",
       "char_5_people               [ 5.  8.  9.]...       9.0   \n",
       "char_6_act                               NaN       NaN   \n",
       "char_6_people               [ 3.  2.  4.]...       7.0   \n",
       "char_7_act                               NaN       NaN   \n",
       "char_7_people            [ 11.   5.   9.]...      25.0   \n",
       "char_8_act                               NaN       NaN   \n",
       "char_8_people               [ 2.  6.  3.]...       8.0   \n",
       "char_9_act                               NaN       NaN   \n",
       "char_9_people               [ 2.  4.  6.]...       9.0   \n",
       "date_act        [ 19262.  19573.  19595.]...     386.0   \n",
       "date_people     [ 18807.  18633.  19153.]...    1195.0   \n",
       "group_1         [ 17304.   8688.  33592.]...   23030.0   \n",
       "outcome                            [ 0.  1.]       2.0   \n",
       "\n",
       "                                   data_ta_3  len_ta_3  \\\n",
       "char_10_act           [ 114.    2.   23.]...     450.0   \n",
       "char_10_people                     [ 0.  1.]       2.0   \n",
       "char_11                            [ 0.  1.]       2.0   \n",
       "char_12                            [ 0.  1.]       2.0   \n",
       "char_13                            [ 0.  1.]       2.0   \n",
       "char_14                            [ 0.  1.]       2.0   \n",
       "char_15                            [ 0.  1.]       2.0   \n",
       "char_16                            [ 0.  1.]       2.0   \n",
       "char_17                            [ 0.  1.]       2.0   \n",
       "char_18                            [ 0.  1.]       2.0   \n",
       "char_19                            [ 0.  1.]       2.0   \n",
       "char_1_act                               NaN       NaN   \n",
       "char_1_people                      [ 2.  1.]       2.0   \n",
       "char_20                            [ 0.  1.]       2.0   \n",
       "char_21                            [ 0.  1.]       2.0   \n",
       "char_22                            [ 0.  1.]       2.0   \n",
       "char_23                            [ 0.  1.]       2.0   \n",
       "char_24                            [ 0.  1.]       2.0   \n",
       "char_25                            [ 0.  1.]       2.0   \n",
       "char_26                            [ 0.  1.]       2.0   \n",
       "char_27                            [ 0.  1.]       2.0   \n",
       "char_28                            [ 0.  1.]       2.0   \n",
       "char_29                            [ 0.  1.]       2.0   \n",
       "char_2_act                               NaN       NaN   \n",
       "char_2_people                  [ 3.  2.  1.]       3.0   \n",
       "char_30                            [ 0.  1.]       2.0   \n",
       "char_31                            [ 1.  0.]       2.0   \n",
       "char_32                            [ 0.  1.]       2.0   \n",
       "char_33                            [ 0.  1.]       2.0   \n",
       "char_34                            [ 0.  1.]       2.0   \n",
       "char_35                            [ 1.  0.]       2.0   \n",
       "char_36                            [ 1.  0.]       2.0   \n",
       "char_37                            [ 0.  1.]       2.0   \n",
       "char_38                  [ 84.  76.   0.]...     101.0   \n",
       "char_3_act                               NaN       NaN   \n",
       "char_3_people            [ 40.  14.   4.]...      42.0   \n",
       "char_4_act                               NaN       NaN   \n",
       "char_4_people            [ 25.   6.   7.]...      25.0   \n",
       "char_5_act                               NaN       NaN   \n",
       "char_5_people               [ 9.  8.  4.]...       9.0   \n",
       "char_6_act                               NaN       NaN   \n",
       "char_6_people               [ 3.  4.  1.]...       7.0   \n",
       "char_7_act                               NaN       NaN   \n",
       "char_7_people            [  8.   9.  20.]...      25.0   \n",
       "char_8_act                               NaN       NaN   \n",
       "char_8_people               [ 2.  3.  6.]...       8.0   \n",
       "char_9_act                               NaN       NaN   \n",
       "char_9_people               [ 2.  3.  6.]...       9.0   \n",
       "date_act        [ 19200.  19442.  19230.]...     386.0   \n",
       "date_people     [ 19200.  19442.  19230.]...    1191.0   \n",
       "group_1         [  6534.  45749.  36096.]...   18955.0   \n",
       "outcome                            [ 1.  0.]       2.0   \n",
       "\n",
       "                                   data_ta_4  len_ta_4  \\\n",
       "char_10_act        [   76.  1727.   894.]...    3315.0   \n",
       "char_10_people                     [ 1.  0.]       2.0   \n",
       "char_11                            [ 0.  1.]       2.0   \n",
       "char_12                            [ 0.  1.]       2.0   \n",
       "char_13                            [ 1.  0.]       2.0   \n",
       "char_14                            [ 1.  0.]       2.0   \n",
       "char_15                            [ 0.  1.]       2.0   \n",
       "char_16                            [ 1.  0.]       2.0   \n",
       "char_17                            [ 0.  1.]       2.0   \n",
       "char_18                            [ 0.  1.]       2.0   \n",
       "char_19                            [ 0.  1.]       2.0   \n",
       "char_1_act                               NaN       NaN   \n",
       "char_1_people                      [ 2.  1.]       2.0   \n",
       "char_20                            [ 0.  1.]       2.0   \n",
       "char_21                            [ 1.  0.]       2.0   \n",
       "char_22                            [ 0.  1.]       2.0   \n",
       "char_23                            [ 0.  1.]       2.0   \n",
       "char_24                            [ 0.  1.]       2.0   \n",
       "char_25                            [ 0.  1.]       2.0   \n",
       "char_26                            [ 0.  1.]       2.0   \n",
       "char_27                            [ 1.  0.]       2.0   \n",
       "char_28                            [ 1.  0.]       2.0   \n",
       "char_29                            [ 0.  1.]       2.0   \n",
       "char_2_act                               NaN       NaN   \n",
       "char_2_people                  [ 2.  3.  1.]       3.0   \n",
       "char_30                            [ 1.  0.]       2.0   \n",
       "char_31                            [ 1.  0.]       2.0   \n",
       "char_32                            [ 0.  1.]       2.0   \n",
       "char_33                            [ 0.  1.]       2.0   \n",
       "char_34                            [ 1.  0.]       2.0   \n",
       "char_35                            [ 1.  0.]       2.0   \n",
       "char_36                            [ 1.  0.]       2.0   \n",
       "char_37                            [ 0.  1.]       2.0   \n",
       "char_38                  [ 36.  99.  84.]...     101.0   \n",
       "char_3_act                               NaN       NaN   \n",
       "char_3_people            [  5.   4.  40.]...      42.0   \n",
       "char_4_act                               NaN       NaN   \n",
       "char_4_people            [  5.   8.  25.]...      25.0   \n",
       "char_5_act                               NaN       NaN   \n",
       "char_5_people               [ 5.  9.  8.]...       9.0   \n",
       "char_6_act                               NaN       NaN   \n",
       "char_6_people               [ 3.  2.  1.]...       7.0   \n",
       "char_7_act                               NaN       NaN   \n",
       "char_7_people            [ 11.   5.   8.]...      25.0   \n",
       "char_8_act                               NaN       NaN   \n",
       "char_8_people               [ 2.  6.  3.]...       8.0   \n",
       "char_9_act                               NaN       NaN   \n",
       "char_9_people               [ 2.  6.  3.]...       9.0   \n",
       "date_act        [ 19595.  19573.  19214.]...     386.0   \n",
       "date_people     [ 18807.  19153.  19200.]...    1189.0   \n",
       "group_1         [ 17304.  33592.   6534.]...   15958.0   \n",
       "outcome                            [ 0.  1.]       2.0   \n",
       "\n",
       "                                   data_ta_5  len_ta_5    ...     \\\n",
       "char_10_act        [ 5493.   489.   584.]...    2747.0    ...      \n",
       "char_10_people                     [ 1.  0.]       2.0    ...      \n",
       "char_11                            [ 0.  1.]       2.0    ...      \n",
       "char_12                            [ 1.  0.]       2.0    ...      \n",
       "char_13                            [ 1.  0.]       2.0    ...      \n",
       "char_14                            [ 1.  0.]       2.0    ...      \n",
       "char_15                            [ 1.  0.]       2.0    ...      \n",
       "char_16                            [ 1.  0.]       2.0    ...      \n",
       "char_17                            [ 1.  0.]       2.0    ...      \n",
       "char_18                            [ 1.  0.]       2.0    ...      \n",
       "char_19                            [ 1.  0.]       2.0    ...      \n",
       "char_1_act                               NaN       NaN    ...      \n",
       "char_1_people                      [ 2.  1.]       2.0    ...      \n",
       "char_20                            [ 1.  0.]       2.0    ...      \n",
       "char_21                            [ 1.  0.]       2.0    ...      \n",
       "char_22                            [ 1.  0.]       2.0    ...      \n",
       "char_23                            [ 1.  0.]       2.0    ...      \n",
       "char_24                            [ 1.  0.]       2.0    ...      \n",
       "char_25                            [ 0.  1.]       2.0    ...      \n",
       "char_26                            [ 0.  1.]       2.0    ...      \n",
       "char_27                            [ 1.  0.]       2.0    ...      \n",
       "char_28                            [ 1.  0.]       2.0    ...      \n",
       "char_29                            [ 0.  1.]       2.0    ...      \n",
       "char_2_act                               NaN       NaN    ...      \n",
       "char_2_people                  [ 3.  2.  1.]       3.0    ...      \n",
       "char_30                            [ 0.  1.]       2.0    ...      \n",
       "char_31                            [ 1.  0.]       2.0    ...      \n",
       "char_32                            [ 1.  0.]       2.0    ...      \n",
       "char_33                            [ 1.  0.]       2.0    ...      \n",
       "char_34                            [ 1.  0.]       2.0    ...      \n",
       "char_35                            [ 0.  1.]       2.0    ...      \n",
       "char_36                            [ 1.  0.]       2.0    ...      \n",
       "char_37                            [ 1.  0.]       2.0    ...      \n",
       "char_38                  [ 91.  76.  68.]...     101.0    ...      \n",
       "char_3_act                               NaN       NaN    ...      \n",
       "char_3_people            [  4.  14.   5.]...      41.0    ...      \n",
       "char_4_act                               NaN       NaN    ...      \n",
       "char_4_people               [ 8.  6.  5.]...      25.0    ...      \n",
       "char_5_act                               NaN       NaN    ...      \n",
       "char_5_people               [ 4.  8.  5.]...       9.0    ...      \n",
       "char_6_act                               NaN       NaN    ...      \n",
       "char_6_people               [ 1.  3.  2.]...       7.0    ...      \n",
       "char_7_act                               NaN       NaN    ...      \n",
       "char_7_people            [  7.   9.  23.]...      25.0    ...      \n",
       "char_8_act                               NaN       NaN    ...      \n",
       "char_8_people               [ 2.  6.  3.]...       8.0    ...      \n",
       "char_9_act                               NaN       NaN    ...      \n",
       "char_9_people               [ 3.  6.  5.]...       9.0    ...      \n",
       "date_act        [ 19383.  19382.  19251.]...     258.0    ...      \n",
       "date_people     [ 19381.  19230.  19215.]...     986.0    ...      \n",
       "group_1         [  4204.  36096.  19662.]...    9231.0    ...      \n",
       "outcome                            [ 1.  0.]       2.0    ...      \n",
       "\n",
       "                                   data_tt_2  len_tt_2  \\\n",
       "char_10_act                            [ 1.]       1.0   \n",
       "char_10_people                     [ 1.  0.]       2.0   \n",
       "char_11                            [ 0.  1.]       2.0   \n",
       "char_12                            [ 0.  1.]       2.0   \n",
       "char_13                            [ 1.  0.]       2.0   \n",
       "char_14                            [ 1.  0.]       2.0   \n",
       "char_15                            [ 0.  1.]       2.0   \n",
       "char_16                            [ 1.  0.]       2.0   \n",
       "char_17                            [ 0.  1.]       2.0   \n",
       "char_18                            [ 0.  1.]       2.0   \n",
       "char_19                            [ 0.  1.]       2.0   \n",
       "char_1_act                               NaN       NaN   \n",
       "char_1_people                      [ 2.  1.]       2.0   \n",
       "char_20                            [ 0.  1.]       2.0   \n",
       "char_21                            [ 1.  0.]       2.0   \n",
       "char_22                            [ 0.  1.]       2.0   \n",
       "char_23                            [ 0.  1.]       2.0   \n",
       "char_24                            [ 0.  1.]       2.0   \n",
       "char_25                            [ 0.  1.]       2.0   \n",
       "char_26                            [ 0.  1.]       2.0   \n",
       "char_27                            [ 1.  0.]       2.0   \n",
       "char_28                            [ 1.  0.]       2.0   \n",
       "char_29                            [ 0.  1.]       2.0   \n",
       "char_2_act                               NaN       NaN   \n",
       "char_2_people                  [ 2.  3.  1.]       3.0   \n",
       "char_30                            [ 1.  0.]       2.0   \n",
       "char_31                            [ 1.  0.]       2.0   \n",
       "char_32                            [ 0.  1.]       2.0   \n",
       "char_33                            [ 0.  1.]       2.0   \n",
       "char_34                            [ 1.  0.]       2.0   \n",
       "char_35                            [ 1.  0.]       2.0   \n",
       "char_36                            [ 1.  0.]       2.0   \n",
       "char_37                            [ 0.  1.]       2.0   \n",
       "char_38                  [ 36.  76.  99.]...     101.0   \n",
       "char_3_act                               NaN       NaN   \n",
       "char_3_people            [  5.  28.   4.]...      41.0   \n",
       "char_4_act                               NaN       NaN   \n",
       "char_4_people               [ 5.  9.  8.]...      25.0   \n",
       "char_5_act                               NaN       NaN   \n",
       "char_5_people               [ 5.  8.  9.]...       9.0   \n",
       "char_6_act                               NaN       NaN   \n",
       "char_6_people               [ 3.  2.  4.]...       7.0   \n",
       "char_7_act                               NaN       NaN   \n",
       "char_7_people            [ 11.   5.   9.]...      25.0   \n",
       "char_8_act                               NaN       NaN   \n",
       "char_8_people               [ 2.  6.  3.]...       8.0   \n",
       "char_9_act                               NaN       NaN   \n",
       "char_9_people               [ 2.  4.  6.]...       9.0   \n",
       "date_act        [ 19262.  19573.  19595.]...     387.0   \n",
       "date_people     [ 18807.  18633.  19153.]...    1165.0   \n",
       "group_1         [ 17304.   8688.  33592.]...    8532.0   \n",
       "outcome                                  NaN       NaN   \n",
       "\n",
       "                                   data_tt_3  len_tt_3  \\\n",
       "char_10_act           [ 114.    2.   23.]...     282.0   \n",
       "char_10_people                     [ 0.  1.]       2.0   \n",
       "char_11                            [ 0.  1.]       2.0   \n",
       "char_12                            [ 0.  1.]       2.0   \n",
       "char_13                            [ 0.  1.]       2.0   \n",
       "char_14                            [ 0.  1.]       2.0   \n",
       "char_15                            [ 0.  1.]       2.0   \n",
       "char_16                            [ 0.  1.]       2.0   \n",
       "char_17                            [ 0.  1.]       2.0   \n",
       "char_18                            [ 0.  1.]       2.0   \n",
       "char_19                            [ 0.  1.]       2.0   \n",
       "char_1_act                               NaN       NaN   \n",
       "char_1_people                      [ 2.  1.]       2.0   \n",
       "char_20                            [ 0.  1.]       2.0   \n",
       "char_21                            [ 0.  1.]       2.0   \n",
       "char_22                            [ 0.  1.]       2.0   \n",
       "char_23                            [ 0.  1.]       2.0   \n",
       "char_24                            [ 0.  1.]       2.0   \n",
       "char_25                            [ 0.  1.]       2.0   \n",
       "char_26                            [ 0.  1.]       2.0   \n",
       "char_27                            [ 0.  1.]       2.0   \n",
       "char_28                            [ 0.  1.]       2.0   \n",
       "char_29                            [ 0.  1.]       2.0   \n",
       "char_2_act                               NaN       NaN   \n",
       "char_2_people                  [ 3.  2.  1.]       3.0   \n",
       "char_30                            [ 0.  1.]       2.0   \n",
       "char_31                            [ 1.  0.]       2.0   \n",
       "char_32                            [ 0.  1.]       2.0   \n",
       "char_33                            [ 0.  1.]       2.0   \n",
       "char_34                            [ 0.  1.]       2.0   \n",
       "char_35                            [ 1.  0.]       2.0   \n",
       "char_36                            [ 1.  0.]       2.0   \n",
       "char_37                            [ 0.  1.]       2.0   \n",
       "char_38                  [ 84.  76.   0.]...     101.0   \n",
       "char_3_act                               NaN       NaN   \n",
       "char_3_people            [ 40.  14.   4.]...      40.0   \n",
       "char_4_act                               NaN       NaN   \n",
       "char_4_people            [ 25.   6.   7.]...      25.0   \n",
       "char_5_act                               NaN       NaN   \n",
       "char_5_people               [ 9.  8.  4.]...       9.0   \n",
       "char_6_act                               NaN       NaN   \n",
       "char_6_people               [ 3.  4.  1.]...       7.0   \n",
       "char_7_act                               NaN       NaN   \n",
       "char_7_people            [  8.   9.  20.]...      25.0   \n",
       "char_8_act                               NaN       NaN   \n",
       "char_8_people               [ 2.  3.  6.]...       8.0   \n",
       "char_9_act                               NaN       NaN   \n",
       "char_9_people               [ 2.  3.  6.]...       9.0   \n",
       "date_act        [ 19200.  19442.  19230.]...     387.0   \n",
       "date_people     [ 19200.  19442.  19230.]...    1106.0   \n",
       "group_1         [  6534.  45749.  36096.]...    7216.0   \n",
       "outcome                                  NaN       NaN   \n",
       "\n",
       "                                   data_tt_4  len_tt_4  \\\n",
       "char_10_act        [   76.  1727.   894.]...    2267.0   \n",
       "char_10_people                     [ 1.  0.]       2.0   \n",
       "char_11                            [ 0.  1.]       2.0   \n",
       "char_12                            [ 0.  1.]       2.0   \n",
       "char_13                            [ 1.  0.]       2.0   \n",
       "char_14                            [ 1.  0.]       2.0   \n",
       "char_15                            [ 0.  1.]       2.0   \n",
       "char_16                            [ 1.  0.]       2.0   \n",
       "char_17                            [ 0.  1.]       2.0   \n",
       "char_18                            [ 0.  1.]       2.0   \n",
       "char_19                            [ 0.  1.]       2.0   \n",
       "char_1_act                               NaN       NaN   \n",
       "char_1_people                      [ 2.  1.]       2.0   \n",
       "char_20                            [ 0.  1.]       2.0   \n",
       "char_21                            [ 1.  0.]       2.0   \n",
       "char_22                            [ 0.  1.]       2.0   \n",
       "char_23                            [ 0.  1.]       2.0   \n",
       "char_24                            [ 0.  1.]       2.0   \n",
       "char_25                            [ 0.  1.]       2.0   \n",
       "char_26                            [ 0.  1.]       2.0   \n",
       "char_27                            [ 1.  0.]       2.0   \n",
       "char_28                            [ 1.  0.]       2.0   \n",
       "char_29                            [ 0.  1.]       2.0   \n",
       "char_2_act                               NaN       NaN   \n",
       "char_2_people                  [ 2.  3.  1.]       3.0   \n",
       "char_30                            [ 1.  0.]       2.0   \n",
       "char_31                            [ 1.  0.]       2.0   \n",
       "char_32                            [ 0.  1.]       2.0   \n",
       "char_33                            [ 0.  1.]       2.0   \n",
       "char_34                            [ 1.  0.]       2.0   \n",
       "char_35                            [ 1.  0.]       2.0   \n",
       "char_36                            [ 1.  0.]       2.0   \n",
       "char_37                            [ 0.  1.]       2.0   \n",
       "char_38                  [ 36.  99.  84.]...     101.0   \n",
       "char_3_act                               NaN       NaN   \n",
       "char_3_people            [  5.   4.  40.]...      40.0   \n",
       "char_4_act                               NaN       NaN   \n",
       "char_4_people            [  5.   8.  25.]...      25.0   \n",
       "char_5_act                               NaN       NaN   \n",
       "char_5_people               [ 5.  9.  8.]...       9.0   \n",
       "char_6_act                               NaN       NaN   \n",
       "char_6_people               [ 3.  2.  1.]...       7.0   \n",
       "char_7_act                               NaN       NaN   \n",
       "char_7_people            [ 11.   5.   8.]...      25.0   \n",
       "char_8_act                               NaN       NaN   \n",
       "char_8_people               [ 2.  6.  3.]...       8.0   \n",
       "char_9_act                               NaN       NaN   \n",
       "char_9_people               [ 2.  6.  3.]...       9.0   \n",
       "date_act        [ 19595.  19573.  19214.]...     386.0   \n",
       "date_people     [ 18807.  19153.  19200.]...    1116.0   \n",
       "group_1         [ 17304.  33592.   6534.]...    5592.0   \n",
       "outcome                                  NaN       NaN   \n",
       "\n",
       "                                   data_tt_5  len_tt_5  \\\n",
       "char_10_act        [ 5493.   489.   584.]...    1409.0   \n",
       "char_10_people                     [ 1.  0.]       2.0   \n",
       "char_11                            [ 0.  1.]       2.0   \n",
       "char_12                            [ 1.  0.]       2.0   \n",
       "char_13                            [ 1.  0.]       2.0   \n",
       "char_14                            [ 1.  0.]       2.0   \n",
       "char_15                            [ 1.  0.]       2.0   \n",
       "char_16                            [ 1.  0.]       2.0   \n",
       "char_17                            [ 1.  0.]       2.0   \n",
       "char_18                            [ 1.  0.]       2.0   \n",
       "char_19                            [ 1.  0.]       2.0   \n",
       "char_1_act                               NaN       NaN   \n",
       "char_1_people                      [ 2.  1.]       2.0   \n",
       "char_20                            [ 1.  0.]       2.0   \n",
       "char_21                            [ 1.  0.]       2.0   \n",
       "char_22                            [ 1.  0.]       2.0   \n",
       "char_23                            [ 1.  0.]       2.0   \n",
       "char_24                            [ 1.  0.]       2.0   \n",
       "char_25                            [ 0.  1.]       2.0   \n",
       "char_26                            [ 0.  1.]       2.0   \n",
       "char_27                            [ 1.  0.]       2.0   \n",
       "char_28                            [ 1.  0.]       2.0   \n",
       "char_29                            [ 0.  1.]       2.0   \n",
       "char_2_act                               NaN       NaN   \n",
       "char_2_people                  [ 3.  2.  1.]       3.0   \n",
       "char_30                            [ 0.  1.]       2.0   \n",
       "char_31                            [ 1.  0.]       2.0   \n",
       "char_32                            [ 1.  0.]       2.0   \n",
       "char_33                            [ 1.  0.]       2.0   \n",
       "char_34                            [ 1.  0.]       2.0   \n",
       "char_35                            [ 0.  1.]       2.0   \n",
       "char_36                            [ 1.  0.]       2.0   \n",
       "char_37                            [ 1.  0.]       2.0   \n",
       "char_38                  [ 91.  76.  68.]...     101.0   \n",
       "char_3_act                               NaN       NaN   \n",
       "char_3_people            [  4.  14.   5.]...      39.0   \n",
       "char_4_act                               NaN       NaN   \n",
       "char_4_people               [ 8.  6.  5.]...      25.0   \n",
       "char_5_act                               NaN       NaN   \n",
       "char_5_people               [ 4.  8.  5.]...       9.0   \n",
       "char_6_act                               NaN       NaN   \n",
       "char_6_people               [ 1.  3.  2.]...       7.0   \n",
       "char_7_act                               NaN       NaN   \n",
       "char_7_people            [  7.   9.  23.]...      25.0   \n",
       "char_8_act                               NaN       NaN   \n",
       "char_8_people               [ 2.  6.  3.]...       8.0   \n",
       "char_9_act                               NaN       NaN   \n",
       "char_9_people               [ 3.  6.  5.]...       9.0   \n",
       "date_act        [ 19383.  19382.  19251.]...     250.0   \n",
       "date_people     [ 19381.  19230.  19215.]...     763.0   \n",
       "group_1         [  4204.  36096.  19662.]...    3370.0   \n",
       "outcome                                  NaN       NaN   \n",
       "\n",
       "                                   data_tt_6  len_tt_6  \n",
       "char_10_act                          [ 110.]       1.0  \n",
       "char_10_people                     [ 0.  1.]       2.0  \n",
       "char_11                            [ 1.  0.]       2.0  \n",
       "char_12                            [ 1.  0.]       2.0  \n",
       "char_13                            [ 1.  0.]       2.0  \n",
       "char_14                            [ 1.  0.]       2.0  \n",
       "char_15                            [ 0.  1.]       2.0  \n",
       "char_16                            [ 0.  1.]       2.0  \n",
       "char_17                            [ 1.  0.]       2.0  \n",
       "char_18                            [ 0.  1.]       2.0  \n",
       "char_19                            [ 1.  0.]       2.0  \n",
       "char_1_act                               NaN       NaN  \n",
       "char_1_people                      [ 2.  1.]       2.0  \n",
       "char_20                            [ 0.  1.]       2.0  \n",
       "char_21                            [ 0.  1.]       2.0  \n",
       "char_22                            [ 1.  0.]       2.0  \n",
       "char_23                            [ 1.  0.]       2.0  \n",
       "char_24                            [ 0.  1.]       2.0  \n",
       "char_25                            [ 1.  0.]       2.0  \n",
       "char_26                            [ 0.  1.]       2.0  \n",
       "char_27                            [ 0.  1.]       2.0  \n",
       "char_28                            [ 0.  1.]       2.0  \n",
       "char_29                            [ 1.  0.]       2.0  \n",
       "char_2_act                               NaN       NaN  \n",
       "char_2_people                  [ 3.  2.  1.]       3.0  \n",
       "char_30                            [ 0.  1.]       2.0  \n",
       "char_31                            [ 1.  0.]       2.0  \n",
       "char_32                            [ 1.  0.]       2.0  \n",
       "char_33                            [ 0.  1.]       2.0  \n",
       "char_34                            [ 1.  0.]       2.0  \n",
       "char_35                            [ 0.  1.]       2.0  \n",
       "char_36                            [ 0.  1.]       2.0  \n",
       "char_37                            [ 0.  1.]       2.0  \n",
       "char_38                  [ 93.  89.   0.]...      95.0  \n",
       "char_3_act                               NaN       NaN  \n",
       "char_3_people            [  7.  15.   9.]...      29.0  \n",
       "char_4_act                               NaN       NaN  \n",
       "char_4_people               [ 2.  3.  6.]...      23.0  \n",
       "char_5_act                               NaN       NaN  \n",
       "char_5_people               [ 7.  6.  5.]...       9.0  \n",
       "char_6_act                               NaN       NaN  \n",
       "char_6_people               [ 2.  1.  4.]...       7.0  \n",
       "char_7_act                               NaN       NaN  \n",
       "char_7_people            [  6.   4.  19.]...      25.0  \n",
       "char_8_act                               NaN       NaN  \n",
       "char_8_people               [ 4.  2.  3.]...       8.0  \n",
       "char_9_act                               NaN       NaN  \n",
       "char_9_people               [ 4.  9.  3.]...       9.0  \n",
       "date_act        [ 19416.  19413.  19314.]...     278.0  \n",
       "date_people     [ 19415.  19287.  19313.]...     441.0  \n",
       "group_1         [ 18966.  38766.  17304.]...     714.0  \n",
       "outcome                                  NaN       NaN  \n",
       "\n",
       "[52 rows x 28 columns]"
      ]
     },
     "execution_count": 33,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "lambda_len=lambda x:len(x.unique())\n",
    "lambda_data=lambda x:str(x.unique()) if(len(x.unique())<=3) else str(x.unique()[:3])+'...'\n",
    "train_results={}\n",
    "test_results={}\n",
    "types=['type %d'%i for i in range(1,8)]\n",
    "for _type in types:\n",
    "    train_results[_type[-1]]=pd.DataFrame({'len':train_datas[_type].apply(lambda_len),\n",
    "                        'data':train_datas[_type].apply(lambda_data)},\n",
    "                        index=train_datas[_type].columns) \n",
    "    test_results[_type[-1]]=pd.DataFrame({'len':test_datas[_type].apply(lambda_len),\n",
    "                        'data':train_datas[_type].apply(lambda_data)},\n",
    "                        index=test_datas[_type].columns) \n",
    "\n",
    "train_12=train_results['1'].merge(train_results['2'],how='outer',left_index=True,right_index=True,suffixes=('_ta_1', '_ta_2')) \n",
    "train_34=train_results['3'].merge(train_results['4'],how='outer',left_index=True,right_index=True,suffixes=('_ta_3', '_ta_4')) \n",
    "train_56=train_results['5'].merge(train_results['6'],how='outer',left_index=True,right_index=True,suffixes=('_ta_5', '_ta_6')) \n",
    "train_test_77=train_results['7'].merge(test_results['7'],how='outer',left_index=True,right_index=True,suffixes=('_ta_7', '_tt_7')) \n",
    "test_12=test_results['1'].merge(test_results['2'],how='outer',left_index=True,right_index=True,suffixes=('_tt_1', '_tt_2')) \n",
    "test_34=test_results['3'].merge(test_results['4'],how='outer',left_index=True,right_index=True,suffixes=('_tt_3', '_tt_4')) \n",
    "test_56=test_results['5'].merge(test_results['6'],how='outer',left_index=True,right_index=True,suffixes=('_tt_5', '_tt_6')) \n",
    "\n",
    "train_12.merge(train_34,how='outer',left_index=True,right_index=True)\\\n",
    "    .merge(train_56,how='outer',left_index=True,right_index=True)  \\\n",
    "    .merge(train_test_77,how='outer',left_index=True,right_index=True)\\\n",
    "    .merge(test_12,how='outer',left_index=True,right_index=True) \\\n",
    "    .merge(test_34,how='outer',left_index=True,right_index=True) \\\n",
    "    .merge(test_56,how='outer',left_index=True,right_index=True)       "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 75,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "from scipy.sparse import hstack,csr_matrix\n",
    "from sklearn.preprocessing  import OneHotEncoder\n",
    "def onehot_encode(train_datas,test_datas): \n",
    "\n",
    "    train_results={}\n",
    "    test_results={}\n",
    "    types=['type %d'%i for i in range(1,8)]\n",
    "    for _type in types:\n",
    "        if _type=='type 1':\n",
    "            one_hot_cols=['char_%d_act'%i for i in range(1,10)]+\\\n",
    "            ['char_%d_people'%i for i in range(1,10)]\n",
    "            train_end_cols=['group_1','date_act','date_people','char_38','outcome']\n",
    "            test_end_cols=['group_1','date_act','date_people','char_38']\n",
    "        else:\n",
    "            one_hot_cols=['char_%d_people'%i for i in range(1,10)]\n",
    "            train_end_cols=['group_1','char_10_act','date_act','date_people','char_38','outcome']\n",
    "            test_end_cols=['group_1','char_10_act','date_act','date_people','char_38']\n",
    "        \n",
    "        train_front_array=train_datas[_type][one_hot_cols].values #头部数组\n",
    "        train_end_array=train_datas[_type][train_end_cols].values#末尾数组\n",
    "        train_middle_array=train_datas[_type].drop(train_end_cols+one_hot_cols,axis=1,inplace=False).values#中间数组\n",
    "        \n",
    "        test_front_array=test_datas[_type][one_hot_cols].values #头部数组\n",
    "        test_end_array=test_datas[_type][test_end_cols].values#末尾数组\n",
    "        test_middle_array=test_datas[_type].drop(test_end_cols+one_hot_cols,axis=1,inplace=False).values#中间数组\n",
    "\n",
    "        encoder=OneHotEncoder(categorical_features='all',sparse=True) # 一个稀疏矩阵，类型为 csr_matrix\n",
    "        train_result=hstack([encoder.fit_transform(train_front_array),csr_matrix(train_middle_array),csr_matrix(train_end_array)])\n",
    "        test_result=hstack([encoder.transform(test_front_array),csr_matrix(test_middle_array),csr_matrix(test_end_array)])\n",
    "        train_results[_type]=train_result\n",
    "        test_results[_type]=test_result\n",
    "    return train_results,test_results"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 91,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "before encode:\n",
      "\n",
      "train(type=type 1):shape= (157615, 51)\n",
      "test(type=type 1):shape= (40092, 50)\n",
      "train(type=type 2):shape= (904683, 43)\n",
      "test(type=type 2):shape= (223164, 42)\n",
      "train(type=type 3):shape= (429408, 43)\n",
      "test(type=type 3):shape= (59931, 42)\n",
      "train(type=type 4):shape= (207465, 43)\n",
      "test(type=type 4):shape= (50215, 42)\n",
      "train(type=type 5):shape= (490710, 43)\n",
      "test(type=type 5):shape= (123463, 42)\n",
      "train(type=type 6):shape= (4253, 43)\n",
      "test(type=type 6):shape= (1051, 42)\n",
      "train(type=type 7):shape= (3157, 43)\n",
      "test(type=type 7):shape= (771, 42)\n",
      "==============\n",
      "\n",
      "\n",
      "after encode:\n",
      "\n",
      "train(type=type 1):shape= (157615, 321)\n",
      "test(type=type 1):shape= (40092, 320)\n",
      "train(type=type 2):shape= (904683, 165)\n",
      "test(type=type 2):shape= (223164, 164)\n",
      "train(type=type 3):shape= (429408, 164)\n",
      "test(type=type 3):shape= (59931, 163)\n",
      "train(type=type 4):shape= (207465, 164)\n",
      "test(type=type 4):shape= (50215, 163)\n",
      "train(type=type 5):shape= (490710, 163)\n",
      "test(type=type 5):shape= (123463, 162)\n",
      "train(type=type 6):shape= (4253, 155)\n",
      "test(type=type 6):shape= (1051, 154)\n",
      "train(type=type 7):shape= (3157, 161)\n",
      "test(type=type 7):shape= (771, 160)\n",
      "==============\n",
      "\n",
      "\n"
     ]
    }
   ],
   "source": [
    "types=['type %d'%i for i in range(1,8)]\n",
    "\n",
    "print('before encode:\\n')\n",
    "for _type in types:\n",
    "    print('train(type=%s):shape='%_type,train_datas[_type].shape)\n",
    "    print('test(type=%s):shape='%_type,test_datas[_type].shape)\n",
    "print('==============\\n\\n')    \n",
    "train_results,test_results=onehot_encode(train_datas,test_datas)\n",
    "print('after encode:\\n')\n",
    "for _type in types:\n",
    "    print('train(type=%s):shape='%_type,train_results[_type].shape)\n",
    "    print('test(type=%s):shape='%_type,test_results[_type].shape)\n",
    "print('==============\\n\\n') "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 89,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "from sklearn.preprocessing  import MaxAbsScaler\n",
    "def scale(train_datas,test_datas): \n",
    "    train_results={}\n",
    "    test_results={}\n",
    "    types=['type %d'%i for i in range(1,8)]\n",
    "    \n",
    "    for _type in types:\n",
    "        if _type=='type 1':\n",
    "            train_last_index=5#最后5列为 group_1/date_act/date_people/char_38/outcome\n",
    "            test_last_index=4#最后4列为 group_1/date_act/date_people/char_38 \n",
    "        else:\n",
    "            train_last_index=6#最后6列为 group_1/char_10_act/date_act/date_people/char_38/outcome\n",
    "            test_last_index=5#最后5列为 group_1/char_10_act/date_act/date_people/char_38 \n",
    "        \n",
    "        scaler=MaxAbsScaler()\n",
    "        train_array=train_datas[_type].toarray()        \n",
    "        train_front=train_array[:,:-train_last_index]\n",
    "        train_mid=scaler.fit_transform(train_array[:,-train_last_index:-1])#outcome 不需要归一化\n",
    "        train_end=train_array[:,-1].reshape((-1,1)) #outcome\n",
    "        train_results[_type]=np.hstack((train_front,train_mid,train_end))\n",
    "        \n",
    "        test_array=test_datas[_type].toarray()\n",
    "        test_front=test_array[:,:-test_last_index]\n",
    "        test_end=scaler.transform(test_array[:,-test_last_index:])\n",
    "        test_results[_type]=np.hstack((test_front,test_end))\n",
    "\n",
    "    return train_results,test_results"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 94,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train(type=type 1): [ 1.] [ 0.]\n",
      "Test(type=type 1): [ 1.] [ 0.]\n",
      "Train(type=type 2): [ 1.] [ 0.]\n",
      "Test(type=type 2): [ 1.] [ 0.]\n",
      "Train(type=type 3): [ 1.] [ 0.]\n",
      "Test(type=type 3): [ 1.] [ 0.]\n",
      "Train(type=type 4): [ 1.] [ 0.]\n",
      "Test(type=type 4): [ 1.] [ 0.]\n",
      "Train(type=type 5): [ 1.] [ 0.]\n",
      "Test(type=type 5): [ 1.] [ 0.]\n",
      "Train(type=type 6): [ 1.] [ 0.]\n",
      "Test(type=type 6): [ 1.] [ 0.]\n",
      "Train(type=type 7): [ 1.] [ 0.]\n",
      "Test(type=type 7): [ 1.] [ 0.]\n"
     ]
    }
   ],
   "source": [
    "ta_results,tt_results=scale(train_results,test_results)\n",
    "types=['type %d'%i for i in range(1,8)]\n",
    "for _type in types:\n",
    "    print(\"Train(type=%s):\"%_type,np.unique(ta_results[_type].max(axis=1)),np.unique(ta_results[_type].min(axis=1)))\n",
    "    print(\"Test(type=%s):\"%_type,np.unique(tt_results[_type].max(axis=1)),np.unique(tt_results[_type].min(axis=1))) "
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.1"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}
